Exemple #1
0
    def _postprocess_solution(self, identifier, future, mode):
        """
        Mark resources as released, clean the working directory.

        :param identifier: A job or task identifier
        :param mode: 'task' or 'job'.
        :raise SchedulerException: Raised if an exception occured during the solution or if results are inconsistent.
        """
        if mode == 'task':
            subdir = 'tasks'
            del self._task_processes[identifier]
        else:
            subdir = 'jobs'
            del self._job_processes[identifier]
        # Mark resources as released
        del self._reserved[subdir][identifier]

        # Include logs into total scheduler logs
        work_dir = os.path.join(self.work_dir, subdir, identifier)

        # Release resources
        if "keep working directory" in self.conf["scheduler"] and self.conf[
                "scheduler"]["keep working directory"]:
            reserved_space = utils.dir_size(work_dir)
        else:
            reserved_space = 0

        self.logger.debug('Yielding result of a future object of {} {}'.format(
            mode, identifier))
        try:
            if future:
                self._manager.release_resources(
                    identifier, self._node_name,
                    True if mode == 'job' else False, reserved_space)
                result = future.result()
                if result != 0:
                    msg = "Work has been interrupted"
                    self.logger.warning(msg)
                    raise schedulers.SchedulerException(msg)
            else:
                self.logger.debug(
                    "Seems that {} {} has not been started".format(
                        mode, identifier))
        except Exception as err:
            error_msg = "Execution of {} {} terminated with an exception: {}".format(
                mode, identifier, err)
            self.logger.warning(error_msg)
            raise schedulers.SchedulerException(error_msg)
        finally:
            # Clean working directory
            if "keep working directory" not in self.conf["scheduler"] or \
                    not self.conf["scheduler"]["keep working directory"]:
                self.logger.debug(
                    "Clean task working directory {} for {}".format(
                        work_dir, identifier))
                shutil.rmtree(work_dir)

        return "FINISHED"
Exemple #2
0
    def _solve_task(self, identifier, description, user, password):
        """
        Solve given verification task.

        :param identifier: Verification task identifier.
        :param description: Verification task description dictionary.
        :param user: User name.
        :param password: Password.
        :return: Return Future object.
        """
        # Submit command
        self.logger.info("Submit the task {0}".format(identifier))
        task = self.__tasks[identifier]
        try:
            return self.wi.submit(
                run=task.run,
                limits=task.run.limits,
                cpu_model=task.run.cpu_model,
                result_files_pattern='output/**',
                priority=task.run.priority,
                user_pwd=task.run.user_pwd(user, password),
                revision=task.run.branch + ':' + task.run.revision,
                meta_information=json.dumps(
                    {'Verification tasks produced by Klever': None}))
        except Exception as err:
            raise schedulers.SchedulerException(str(err))
Exemple #3
0
    def _execute(logfile, process):
        """
        Common implementation for running of a multiprocessing process and for waiting until it terminates.

        :param process: multiprocessing.Process object.
        :raise SchedulerException: Raised if process cannot be executed or if its exit code cannot be determined.
        """
        def log(msg):
            """This avoids killing problem of logging loggers."""
            if os.path.isfile(logfile):
                with open(logfile, 'a') as fp:
                    print(msg, file=fp)
            else:
                print(msg)

        log("Future task {!r}: Going to start a new process which will start native scheduler client"
            .format(process.name))
        process.start()
        log("Future task {!r}: get pid of the started process.".format(
            process.name))
        if process.pid:
            log("Future task {!r}: the pid is {!r}.".format(
                process.name, process.pid))
            while process.is_alive():
                j = process.join(5)
                if j is not None:
                    break
            log("Future task {!r}: join method returned {!r}.".format(
                process.name, str(j)))
            log("Future task {!r}: process {!r} joined, going to check its exit code"
                .format(process.name, process.pid))
            ec = process.exitcode
            log("Future task {!r}: exit code of the process {!r} is {!r}".
                format(process.name, process.pid, str(ec)))
            if ec is not None:
                return str(ec)
            else:
                error_msg = 'Cannot determine exit code of process {!r}'.format(
                    process.pid)
                raise schedulers.SchedulerException(error_msg)
        else:
            raise schedulers.SchedulerException(
                "Cannot launch process to run a job or a task")
Exemple #4
0
    def _process_task_result(self, identifier, future, description):
        """
        Process result and send results to the server.

        :param identifier: Task identifier string.
        :param future: Future object.
        :return: status of the task after solution: FINISHED.
        :raise SchedulerException: in case of ERROR status.
        """
        run = self.__tasks[identifier]
        self.__drop_task(identifier)

        task_work_dir = os.path.join(self.work_dir, "tasks", identifier)
        solution_file = os.path.join(task_work_dir, "solution.zip")
        self.logger.debug(
            "Save solution to the disk as {}".format(solution_file))
        try:
            result = future.result()
        except Exception as err:
            error_msg = "Task {} has been finished but no data has been received: {}".format(
                identifier, err)
            self.logger.warning(error_msg)
            raise schedulers.SchedulerException(error_msg)

        # Save result
        with open(solution_file, 'wb') as sa:
            sa.write(result)

        # Unpack results
        task_solution_dir = os.path.join(task_work_dir, "solution")
        self.logger.debug(
            "Make directory for the solution to extract {0}".format(
                task_solution_dir))
        os.makedirs(task_solution_dir.encode("utf-8"), exist_ok=True)
        self.logger.debug("Extract results from {} to {}".format(
            solution_file, task_solution_dir))
        shutil.unpack_archive(solution_file, task_solution_dir)
        # Process results and convert RunExec output to result description
        # TODO: what will happen if there will be several input files?
        # Simulate BenchExec behaviour when one input file is provided.
        os.makedirs(os.path.join(task_solution_dir, "output",
                                 "benchmark.logfiles").encode("utf-8"),
                    exist_ok=True)
        shutil.move(
            os.path.join(task_solution_dir, 'output.log'),
            os.path.join(
                task_solution_dir, "output", "benchmark.logfiles",
                "{}.log".format(os.path.basename(run.run.sourcefiles[0]))))

        try:
            solution_identifier, solution_description = self.__extract_description(
                task_solution_dir)
            self.logger.debug(
                "Successfully extracted solution {} for task {}".format(
                    solution_identifier, identifier))
        except Exception as err:
            self.logger.warning(
                "Cannot extract results from a solution: {}".format(err))
            raise err

        # Make fake BenchExec XML report
        self.__make_fake_benchexec(
            solution_description,
            os.path.join(task_work_dir, 'solution', 'output',
                         "benchmark.results.xml"))

        # Add actual restrictions
        solution_description['resource limits'] = description[
            "resource limits"]

        # Make archive
        solution_archive = os.path.join(task_work_dir, "solution")
        self.logger.debug(
            "Make archive {} with a solution of the task {}.zip".format(
                solution_archive, identifier))
        shutil.make_archive(solution_archive, 'zip', task_solution_dir)
        solution_archive += ".zip"

        # Push result
        self.logger.debug(
            "Upload solution archive {} of the task {} to the verification gateway"
            .format(solution_archive, identifier))
        try:
            utils.submit_task_results(self.logger, self.server,
                                      self.scheduler_type(), identifier,
                                      solution_description,
                                      os.path.join(task_work_dir, "solution"))
        except Exception as err:
            error_msg = "Cannot submit solution results of task {}: {}".format(
                identifier, err)
            self.logger.warning(error_msg)
            raise schedulers.SchedulerException(error_msg)

        if "keep working directory" not in self.conf["scheduler"] or \
                not self.conf["scheduler"]["keep working directory"]:
            self.logger.debug("Clean task working directory {} for {}".format(
                task_work_dir, identifier))
            shutil.rmtree(task_work_dir)

        self.logger.debug(
            "Task {} has been processed successfully".format(identifier))
        return "FINISHED"
Exemple #5
0
    def _prepare_task(self, identifier, description):
        """
        Prepare a working directory before starting the solution.

        :param identifier: Verification task identifier.
        :param description: Dictionary with task description.
        :raise SchedulerException: If a task cannot be scheduled or preparation failed.
        """
        # Prepare working directory
        task_work_dir = os.path.join(self.work_dir, "tasks", identifier)
        task_data_dir = os.path.join(task_work_dir, "data")
        job_id = description['job id']

        self.logger.debug(
            "Make directory for the task to solve {!r}".format(task_data_dir))
        os.makedirs(task_data_dir.encode("utf-8"), exist_ok=True)

        # This method can be called several times to adjust resource limitations but we should avoid extra downloads
        # from the server
        if identifier not in self.__tasks:
            archive = os.path.join(task_work_dir, "task.zip")
            self.logger.debug(
                "Pull from the verification gateway archive {!r}".format(
                    archive))
            ret = self.server.pull_task(identifier, archive)
            if not ret:
                self.logger.info(
                    "Seems that the task data cannot be downloaded because of a respected reason, "
                    "so we have nothing to do there")
                os._exit(1)
            self.logger.debug("Unpack archive {!r} to {!r}".format(
                archive, task_data_dir))
            shutil.unpack_archive(archive, task_data_dir)

            # Update description
            description.update(self.__get_credentials(job_id))

        # TODO: Add more exceptions handling to make code more reliable
        with open(os.path.join(
                os.path.join(self.work_dir, "tasks", identifier), "task.json"),
                  "w",
                  encoding="utf-8") as fp:
            json.dump(description,
                      fp,
                      ensure_ascii=False,
                      sort_keys=True,
                      indent=4)

        # Prepare command to submit
        self.logger.debug(
            "Prepare arguments of the task {!r}".format(identifier))
        task_data_dir = os.path.join(self.work_dir, "tasks", identifier,
                                     "data")
        try:
            assert description["priority"] in ["LOW", "IDLE"]
            run = Run(task_data_dir, description)
        except Exception as err:
            raise schedulers.SchedulerException(
                'Cannot prepare task description on base of given benchmark.xml: {}'
                .format(err))

        self.__track_task(job_id, run, identifier)
        return True
Exemple #6
0
    def _postprocess_solution(self, identifier, future, mode):
        """
        Mark resources as released, clean the working directory.

        :param identifier: A job or task identifier
        :param mode: 'task' or 'job'.
        :raise SchedulerException: Raised if an exception occured during the solution or if results are inconsistent.
        """
        if mode == 'task':
            subdir = 'tasks'
            if identifier in self._task_processes:
                del self._task_processes[identifier]
        else:
            subdir = 'jobs'
            if identifier in self._job_processes:
                del self._job_processes[identifier]
        # Mark resources as released
        del self._reserved[subdir][identifier]

        # Include logs into total scheduler logs
        work_dir = os.path.join(self.work_dir, subdir, identifier)

        # Release resources
        if "keep working directory" in self.conf["scheduler"] and self.conf["scheduler"]["keep working directory"] and \
                os.path.isdir(work_dir):
            reserved_space = utils.dir_size(work_dir)
        else:
            reserved_space = 0

        self.logger.debug('Yielding result of a future object of {} {}'.format(
            mode, identifier))
        try:
            if future:
                self._manager.release_resources(
                    identifier, self._node_name,
                    True if mode == 'job' else False, reserved_space)

                result = future.result()
                self.logger.info(
                    f'Future processor of {mode} {identifier} returned {result}'
                )

                logfile = "{}/client-log.log".format(work_dir)
                if os.path.isfile(logfile):
                    with open(logfile, mode='r', encoding="utf8") as f:
                        self.logger.debug("Scheduler client log: {}".format(
                            f.read()))
                else:
                    self.logger.warning(
                        "Cannot find Scheduler client file with logs: {!r}".
                        format(logfile))

                errors_file = "{}/client-critical.log".format(work_dir)
                if os.path.isfile(errors_file):
                    with open(errors_file, mode='r', encoding="utf8") as f:
                        errors = f.readlines()
                    if self.conf["scheduler"].get("ignore BenchExec warnings"):
                        for msg in list(errors):
                            match = re.search(r'WARNING - (.*)', msg)
                            if match and (
                                    self.conf["scheduler"]
                                ["ignore BenchExec warnings"] is True or
                                (isinstance(
                                    self.conf["scheduler"]
                                    ["ignore BenchExec warnings"], list)
                                 and any(True for t in self.conf["scheduler"]
                                         ["ignore BenchExec warnings"]
                                         if t in msg))):
                                errors.remove(msg)
                            elif re.search(
                                    r'benchexec(.*) outputted to STDERR', msg):
                                errors.remove(msg)
                else:
                    errors = []

                if errors:
                    error_msg = errors[-1]
                else:
                    error_msg = None
                    try:
                        result = int(result)
                    except ValueError:
                        error_msg = f'Cannot cast {result} to integer'
                    else:
                        if result != 0:
                            error_msg = "Exited with exit code: {}".format(
                                result)

                if error_msg:
                    self.logger.warning(error_msg)
                    raise schedulers.SchedulerException(error_msg)
            else:
                self.logger.debug(
                    "Seems that {} {} has not been started".format(
                        mode, identifier))
        except Exception as err:
            error_msg = "Execution of {} {} terminated with an exception: {}".format(
                mode, identifier, str(err))
            raise schedulers.SchedulerException(error_msg)
        finally:
            # Clean working directory
            if "keep working directory" not in self.conf["scheduler"] or \
                    not self.conf["scheduler"]["keep working directory"]:
                self.logger.debug(
                    "Clean task working directory {} for {}".format(
                        work_dir, identifier))
                shutil.rmtree(work_dir)

        return "FINISHED"
Exemple #7
0
    def _prepare_solution(self, identifier, configuration, mode='task'):
        """
        Generate a working directory, configuration files and multiprocessing Process object to be ready to just run it.

        :param identifier: Job or task identifier.
        :param configuration: A dictionary with a cinfiguration or description.
        :param mode: 'task' or 'job'.
        :raise SchedulerException: Raised if the preparation fails and task or job cannot be scheduled.
        """
        self.logger.info("Going to prepare execution of the {} {}".format(
            mode, identifier))
        node_status = self._manager.node_info(self._node_name)

        if mode == 'task':
            subdir = 'tasks'
            client_conf = self._get_task_configuration()
            self._manager.check_resources(configuration, job=False)
        else:
            subdir = 'jobs'
            client_conf = self._job_conf_prototype.copy()
            self._manager.check_resources(configuration, job=True)

        args = [self._client_bin, mode]

        self._create_work_dir(subdir, identifier)
        client_conf["Klever Bridge"] = self.conf["Klever Bridge"]
        client_conf["identifier"] = identifier
        work_dir = os.path.join(self.work_dir, subdir, identifier)
        file_name = os.path.join(work_dir, 'client.json')
        args.extend(['--file', file_name])
        self._reserved[subdir][identifier] = dict()

        # Check disk space limitation
        if "keep working directory" in self.conf["scheduler"] and self.conf["scheduler"]["keep working directory"] and \
                'disk memory size' in configuration["resource limits"] and \
                configuration["resource limits"]['disk memory size']:
            current_space = int(
                utils.get_output('du -bs {} | cut -f1'.format(work_dir)))
            if current_space > configuration["resource limits"][
                    'disk memory size']:
                raise schedulers.SchedulerException(
                    "Clean manually existing working directory of {} since its size on the disk is {}B which is "
                    "greater than allowed limitation of {}B".format(
                        os.path.abspath(work_dir), current_space,
                        configuration["resource limits"]['disk memory size']))

        if configuration["resource limits"].get("CPU time"):
            # This is emergency timer if something will hang
            timeout = int(
                (configuration["resource limits"]["CPU time"] * 1.5) / 100)
        else:
            timeout = None
        process = multiprocessing.Process(None, self._process_starter,
                                          identifier, [timeout, args])

        if mode == 'task':
            client_conf["Klever Bridge"] = self.conf["Klever Bridge"]
            client_conf["identifier"] = identifier
            client_conf["common"]["working directory"] = work_dir
            for name in ("verifier", "upload verifier input files"):
                client_conf[name] = configuration[name]

            # Speculative flag
            if configuration.get('speculative'):
                client_conf["speculative"] = True

            # Do verification versions check
            if client_conf['verifier']['name'] not in client_conf['client'][
                    'verification tools']:
                raise schedulers.SchedulerException(
                    'Use another verification tool or install and then specify verifier {!r} with its versions at {!r}'
                    .format(
                        client_conf['verifier']['name'],
                        self.conf["scheduler"]["task client configuration"]))
            if 'version' not in client_conf['verifier']:
                raise schedulers.SchedulerException(
                    'Cannot find any given {!r} version at at task description'
                    .format(client_conf['verifier']['name']))
            if client_conf['verifier']['version'] not in \
                    client_conf['client']['verification tools'][client_conf['verifier']['name']]:
                raise schedulers.SchedulerException(
                    'Use another version of {!r} or install given version {!r} and specify it at scheduler client '
                    'configuration {!r}'.format(
                        client_conf['verifier']['name'],
                        client_conf['verifier']['version'],
                        self.conf["scheduler"]["task client configuration"]))

            self._task_processes[identifier] = process
        else:
            klever_core_conf = configuration.copy()
            del klever_core_conf["resource limits"]
            klever_core_conf["Klever Bridge"] = self.conf["Klever Bridge"]
            klever_core_conf["working directory"] = "klever-core-work-dir"
            self._reserved["jobs"][identifier][
                "configuration"] = klever_core_conf
            client_conf["common"]["working directory"] = work_dir
            client_conf["Klever Core conf"] = self._reserved["jobs"][
                identifier]["configuration"]

            self._job_processes[identifier] = process

        client_conf["resource limits"] = configuration["resource limits"]
        # Add particular cores
        if "resource limits" not in client_conf:
            client_conf["resource limits"] = {}
        client_conf["resource limits"]["CPU cores"] = \
            self._get_virtual_cores(int(node_status["available CPU number"]),
                                    int(node_status["reserved CPU number"]),
                                    int(configuration["resource limits"]["number of CPU cores"]))
        if mode != "task":
            if len(client_conf["resource limits"]["CPU cores"]) == 0:
                data = utils.extract_cpu_cores_info()
                client_conf["Klever Core conf"]["task resource limits"]["CPU Virtual cores"] = \
                    sum((len(data[a]) for a in data))
            else:
                client_conf["Klever Core conf"]["task resource limits"]["CPU Virtual cores"] = \
                    len(client_conf["resource limits"]["CPU cores"])

            # Save Klever Core configuration to default configuration file
            with open(os.path.join(work_dir, "core.json"),
                      "w",
                      encoding="utf8") as fh:
                json.dump(client_conf["Klever Core conf"],
                          fh,
                          ensure_ascii=False,
                          sort_keys=True,
                          indent=4)

        with open(file_name, 'w', encoding="utf8") as fp:
            json.dump(client_conf,
                      fp,
                      ensure_ascii=False,
                      sort_keys=True,
                      indent=4)