def _postprocess_solution(self, identifier, future, mode): """ Mark resources as released, clean the working directory. :param identifier: A job or task identifier :param mode: 'task' or 'job'. :raise SchedulerException: Raised if an exception occured during the solution or if results are inconsistent. """ if mode == 'task': subdir = 'tasks' del self._task_processes[identifier] else: subdir = 'jobs' del self._job_processes[identifier] # Mark resources as released del self._reserved[subdir][identifier] # Include logs into total scheduler logs work_dir = os.path.join(self.work_dir, subdir, identifier) # Release resources if "keep working directory" in self.conf["scheduler"] and self.conf[ "scheduler"]["keep working directory"]: reserved_space = utils.dir_size(work_dir) else: reserved_space = 0 self.logger.debug('Yielding result of a future object of {} {}'.format( mode, identifier)) try: if future: self._manager.release_resources( identifier, self._node_name, True if mode == 'job' else False, reserved_space) result = future.result() if result != 0: msg = "Work has been interrupted" self.logger.warning(msg) raise schedulers.SchedulerException(msg) else: self.logger.debug( "Seems that {} {} has not been started".format( mode, identifier)) except Exception as err: error_msg = "Execution of {} {} terminated with an exception: {}".format( mode, identifier, err) self.logger.warning(error_msg) raise schedulers.SchedulerException(error_msg) finally: # Clean working directory if "keep working directory" not in self.conf["scheduler"] or \ not self.conf["scheduler"]["keep working directory"]: self.logger.debug( "Clean task working directory {} for {}".format( work_dir, identifier)) shutil.rmtree(work_dir) return "FINISHED"
def _solve_task(self, identifier, description, user, password): """ Solve given verification task. :param identifier: Verification task identifier. :param description: Verification task description dictionary. :param user: User name. :param password: Password. :return: Return Future object. """ # Submit command self.logger.info("Submit the task {0}".format(identifier)) task = self.__tasks[identifier] try: return self.wi.submit( run=task.run, limits=task.run.limits, cpu_model=task.run.cpu_model, result_files_pattern='output/**', priority=task.run.priority, user_pwd=task.run.user_pwd(user, password), revision=task.run.branch + ':' + task.run.revision, meta_information=json.dumps( {'Verification tasks produced by Klever': None})) except Exception as err: raise schedulers.SchedulerException(str(err))
def _execute(logfile, process): """ Common implementation for running of a multiprocessing process and for waiting until it terminates. :param process: multiprocessing.Process object. :raise SchedulerException: Raised if process cannot be executed or if its exit code cannot be determined. """ def log(msg): """This avoids killing problem of logging loggers.""" if os.path.isfile(logfile): with open(logfile, 'a') as fp: print(msg, file=fp) else: print(msg) log("Future task {!r}: Going to start a new process which will start native scheduler client" .format(process.name)) process.start() log("Future task {!r}: get pid of the started process.".format( process.name)) if process.pid: log("Future task {!r}: the pid is {!r}.".format( process.name, process.pid)) while process.is_alive(): j = process.join(5) if j is not None: break log("Future task {!r}: join method returned {!r}.".format( process.name, str(j))) log("Future task {!r}: process {!r} joined, going to check its exit code" .format(process.name, process.pid)) ec = process.exitcode log("Future task {!r}: exit code of the process {!r} is {!r}". format(process.name, process.pid, str(ec))) if ec is not None: return str(ec) else: error_msg = 'Cannot determine exit code of process {!r}'.format( process.pid) raise schedulers.SchedulerException(error_msg) else: raise schedulers.SchedulerException( "Cannot launch process to run a job or a task")
def _process_task_result(self, identifier, future, description): """ Process result and send results to the server. :param identifier: Task identifier string. :param future: Future object. :return: status of the task after solution: FINISHED. :raise SchedulerException: in case of ERROR status. """ run = self.__tasks[identifier] self.__drop_task(identifier) task_work_dir = os.path.join(self.work_dir, "tasks", identifier) solution_file = os.path.join(task_work_dir, "solution.zip") self.logger.debug( "Save solution to the disk as {}".format(solution_file)) try: result = future.result() except Exception as err: error_msg = "Task {} has been finished but no data has been received: {}".format( identifier, err) self.logger.warning(error_msg) raise schedulers.SchedulerException(error_msg) # Save result with open(solution_file, 'wb') as sa: sa.write(result) # Unpack results task_solution_dir = os.path.join(task_work_dir, "solution") self.logger.debug( "Make directory for the solution to extract {0}".format( task_solution_dir)) os.makedirs(task_solution_dir.encode("utf-8"), exist_ok=True) self.logger.debug("Extract results from {} to {}".format( solution_file, task_solution_dir)) shutil.unpack_archive(solution_file, task_solution_dir) # Process results and convert RunExec output to result description # TODO: what will happen if there will be several input files? # Simulate BenchExec behaviour when one input file is provided. os.makedirs(os.path.join(task_solution_dir, "output", "benchmark.logfiles").encode("utf-8"), exist_ok=True) shutil.move( os.path.join(task_solution_dir, 'output.log'), os.path.join( task_solution_dir, "output", "benchmark.logfiles", "{}.log".format(os.path.basename(run.run.sourcefiles[0])))) try: solution_identifier, solution_description = self.__extract_description( task_solution_dir) self.logger.debug( "Successfully extracted solution {} for task {}".format( solution_identifier, identifier)) except Exception as err: self.logger.warning( "Cannot extract results from a solution: {}".format(err)) raise err # Make fake BenchExec XML report self.__make_fake_benchexec( solution_description, os.path.join(task_work_dir, 'solution', 'output', "benchmark.results.xml")) # Add actual restrictions solution_description['resource limits'] = description[ "resource limits"] # Make archive solution_archive = os.path.join(task_work_dir, "solution") self.logger.debug( "Make archive {} with a solution of the task {}.zip".format( solution_archive, identifier)) shutil.make_archive(solution_archive, 'zip', task_solution_dir) solution_archive += ".zip" # Push result self.logger.debug( "Upload solution archive {} of the task {} to the verification gateway" .format(solution_archive, identifier)) try: utils.submit_task_results(self.logger, self.server, self.scheduler_type(), identifier, solution_description, os.path.join(task_work_dir, "solution")) except Exception as err: error_msg = "Cannot submit solution results of task {}: {}".format( identifier, err) self.logger.warning(error_msg) raise schedulers.SchedulerException(error_msg) if "keep working directory" not in self.conf["scheduler"] or \ not self.conf["scheduler"]["keep working directory"]: self.logger.debug("Clean task working directory {} for {}".format( task_work_dir, identifier)) shutil.rmtree(task_work_dir) self.logger.debug( "Task {} has been processed successfully".format(identifier)) return "FINISHED"
def _prepare_task(self, identifier, description): """ Prepare a working directory before starting the solution. :param identifier: Verification task identifier. :param description: Dictionary with task description. :raise SchedulerException: If a task cannot be scheduled or preparation failed. """ # Prepare working directory task_work_dir = os.path.join(self.work_dir, "tasks", identifier) task_data_dir = os.path.join(task_work_dir, "data") job_id = description['job id'] self.logger.debug( "Make directory for the task to solve {!r}".format(task_data_dir)) os.makedirs(task_data_dir.encode("utf-8"), exist_ok=True) # This method can be called several times to adjust resource limitations but we should avoid extra downloads # from the server if identifier not in self.__tasks: archive = os.path.join(task_work_dir, "task.zip") self.logger.debug( "Pull from the verification gateway archive {!r}".format( archive)) ret = self.server.pull_task(identifier, archive) if not ret: self.logger.info( "Seems that the task data cannot be downloaded because of a respected reason, " "so we have nothing to do there") os._exit(1) self.logger.debug("Unpack archive {!r} to {!r}".format( archive, task_data_dir)) shutil.unpack_archive(archive, task_data_dir) # Update description description.update(self.__get_credentials(job_id)) # TODO: Add more exceptions handling to make code more reliable with open(os.path.join( os.path.join(self.work_dir, "tasks", identifier), "task.json"), "w", encoding="utf-8") as fp: json.dump(description, fp, ensure_ascii=False, sort_keys=True, indent=4) # Prepare command to submit self.logger.debug( "Prepare arguments of the task {!r}".format(identifier)) task_data_dir = os.path.join(self.work_dir, "tasks", identifier, "data") try: assert description["priority"] in ["LOW", "IDLE"] run = Run(task_data_dir, description) except Exception as err: raise schedulers.SchedulerException( 'Cannot prepare task description on base of given benchmark.xml: {}' .format(err)) self.__track_task(job_id, run, identifier) return True
def _postprocess_solution(self, identifier, future, mode): """ Mark resources as released, clean the working directory. :param identifier: A job or task identifier :param mode: 'task' or 'job'. :raise SchedulerException: Raised if an exception occured during the solution or if results are inconsistent. """ if mode == 'task': subdir = 'tasks' if identifier in self._task_processes: del self._task_processes[identifier] else: subdir = 'jobs' if identifier in self._job_processes: del self._job_processes[identifier] # Mark resources as released del self._reserved[subdir][identifier] # Include logs into total scheduler logs work_dir = os.path.join(self.work_dir, subdir, identifier) # Release resources if "keep working directory" in self.conf["scheduler"] and self.conf["scheduler"]["keep working directory"] and \ os.path.isdir(work_dir): reserved_space = utils.dir_size(work_dir) else: reserved_space = 0 self.logger.debug('Yielding result of a future object of {} {}'.format( mode, identifier)) try: if future: self._manager.release_resources( identifier, self._node_name, True if mode == 'job' else False, reserved_space) result = future.result() self.logger.info( f'Future processor of {mode} {identifier} returned {result}' ) logfile = "{}/client-log.log".format(work_dir) if os.path.isfile(logfile): with open(logfile, mode='r', encoding="utf8") as f: self.logger.debug("Scheduler client log: {}".format( f.read())) else: self.logger.warning( "Cannot find Scheduler client file with logs: {!r}". format(logfile)) errors_file = "{}/client-critical.log".format(work_dir) if os.path.isfile(errors_file): with open(errors_file, mode='r', encoding="utf8") as f: errors = f.readlines() if self.conf["scheduler"].get("ignore BenchExec warnings"): for msg in list(errors): match = re.search(r'WARNING - (.*)', msg) if match and ( self.conf["scheduler"] ["ignore BenchExec warnings"] is True or (isinstance( self.conf["scheduler"] ["ignore BenchExec warnings"], list) and any(True for t in self.conf["scheduler"] ["ignore BenchExec warnings"] if t in msg))): errors.remove(msg) elif re.search( r'benchexec(.*) outputted to STDERR', msg): errors.remove(msg) else: errors = [] if errors: error_msg = errors[-1] else: error_msg = None try: result = int(result) except ValueError: error_msg = f'Cannot cast {result} to integer' else: if result != 0: error_msg = "Exited with exit code: {}".format( result) if error_msg: self.logger.warning(error_msg) raise schedulers.SchedulerException(error_msg) else: self.logger.debug( "Seems that {} {} has not been started".format( mode, identifier)) except Exception as err: error_msg = "Execution of {} {} terminated with an exception: {}".format( mode, identifier, str(err)) raise schedulers.SchedulerException(error_msg) finally: # Clean working directory if "keep working directory" not in self.conf["scheduler"] or \ not self.conf["scheduler"]["keep working directory"]: self.logger.debug( "Clean task working directory {} for {}".format( work_dir, identifier)) shutil.rmtree(work_dir) return "FINISHED"
def _prepare_solution(self, identifier, configuration, mode='task'): """ Generate a working directory, configuration files and multiprocessing Process object to be ready to just run it. :param identifier: Job or task identifier. :param configuration: A dictionary with a cinfiguration or description. :param mode: 'task' or 'job'. :raise SchedulerException: Raised if the preparation fails and task or job cannot be scheduled. """ self.logger.info("Going to prepare execution of the {} {}".format( mode, identifier)) node_status = self._manager.node_info(self._node_name) if mode == 'task': subdir = 'tasks' client_conf = self._get_task_configuration() self._manager.check_resources(configuration, job=False) else: subdir = 'jobs' client_conf = self._job_conf_prototype.copy() self._manager.check_resources(configuration, job=True) args = [self._client_bin, mode] self._create_work_dir(subdir, identifier) client_conf["Klever Bridge"] = self.conf["Klever Bridge"] client_conf["identifier"] = identifier work_dir = os.path.join(self.work_dir, subdir, identifier) file_name = os.path.join(work_dir, 'client.json') args.extend(['--file', file_name]) self._reserved[subdir][identifier] = dict() # Check disk space limitation if "keep working directory" in self.conf["scheduler"] and self.conf["scheduler"]["keep working directory"] and \ 'disk memory size' in configuration["resource limits"] and \ configuration["resource limits"]['disk memory size']: current_space = int( utils.get_output('du -bs {} | cut -f1'.format(work_dir))) if current_space > configuration["resource limits"][ 'disk memory size']: raise schedulers.SchedulerException( "Clean manually existing working directory of {} since its size on the disk is {}B which is " "greater than allowed limitation of {}B".format( os.path.abspath(work_dir), current_space, configuration["resource limits"]['disk memory size'])) if configuration["resource limits"].get("CPU time"): # This is emergency timer if something will hang timeout = int( (configuration["resource limits"]["CPU time"] * 1.5) / 100) else: timeout = None process = multiprocessing.Process(None, self._process_starter, identifier, [timeout, args]) if mode == 'task': client_conf["Klever Bridge"] = self.conf["Klever Bridge"] client_conf["identifier"] = identifier client_conf["common"]["working directory"] = work_dir for name in ("verifier", "upload verifier input files"): client_conf[name] = configuration[name] # Speculative flag if configuration.get('speculative'): client_conf["speculative"] = True # Do verification versions check if client_conf['verifier']['name'] not in client_conf['client'][ 'verification tools']: raise schedulers.SchedulerException( 'Use another verification tool or install and then specify verifier {!r} with its versions at {!r}' .format( client_conf['verifier']['name'], self.conf["scheduler"]["task client configuration"])) if 'version' not in client_conf['verifier']: raise schedulers.SchedulerException( 'Cannot find any given {!r} version at at task description' .format(client_conf['verifier']['name'])) if client_conf['verifier']['version'] not in \ client_conf['client']['verification tools'][client_conf['verifier']['name']]: raise schedulers.SchedulerException( 'Use another version of {!r} or install given version {!r} and specify it at scheduler client ' 'configuration {!r}'.format( client_conf['verifier']['name'], client_conf['verifier']['version'], self.conf["scheduler"]["task client configuration"])) self._task_processes[identifier] = process else: klever_core_conf = configuration.copy() del klever_core_conf["resource limits"] klever_core_conf["Klever Bridge"] = self.conf["Klever Bridge"] klever_core_conf["working directory"] = "klever-core-work-dir" self._reserved["jobs"][identifier][ "configuration"] = klever_core_conf client_conf["common"]["working directory"] = work_dir client_conf["Klever Core conf"] = self._reserved["jobs"][ identifier]["configuration"] self._job_processes[identifier] = process client_conf["resource limits"] = configuration["resource limits"] # Add particular cores if "resource limits" not in client_conf: client_conf["resource limits"] = {} client_conf["resource limits"]["CPU cores"] = \ self._get_virtual_cores(int(node_status["available CPU number"]), int(node_status["reserved CPU number"]), int(configuration["resource limits"]["number of CPU cores"])) if mode != "task": if len(client_conf["resource limits"]["CPU cores"]) == 0: data = utils.extract_cpu_cores_info() client_conf["Klever Core conf"]["task resource limits"]["CPU Virtual cores"] = \ sum((len(data[a]) for a in data)) else: client_conf["Klever Core conf"]["task resource limits"]["CPU Virtual cores"] = \ len(client_conf["resource limits"]["CPU cores"]) # Save Klever Core configuration to default configuration file with open(os.path.join(work_dir, "core.json"), "w", encoding="utf8") as fh: json.dump(client_conf["Klever Core conf"], fh, ensure_ascii=False, sort_keys=True, indent=4) with open(file_name, 'w', encoding="utf8") as fp: json.dump(client_conf, fp, ensure_ascii=False, sort_keys=True, indent=4)