예제 #1
0
    def make_job_script(cfg, job_name):
        """Creates the job script.

        Once all jobs in the array are launched, the job script can be created.
        It is a bash script that takes the job index as a single argument.
        This index is set in the bsub command as '$LSB_JOBINDEX', which bsub
        sets as the actual index when launching that job in the array. This
        script is super simple - it is just a giant case statement that
        switches on the job index to run that specific job. This preferred over
        creating individual scripts for each job which incurs additional file
        I/O overhead when the scratch area is on NFS, causing a slowdown.

        Returns the path to the job script.
        """

        lines = ["#!/usr/bin/env bash\nset -e\n"]

        # Activate the python virtualenv if it exists.
        if Launcher.pyvenv:
            lines += ["source {}/bin/activate\n".format(Launcher.pyvenv)]

        lines += ["case $1 in\n"]
        for job in LsfLauncher.jobs[cfg][job_name]:
            # Redirect the job's stdout and stderr to its log file.
            cmd = "{} > {} 2>&1".format(job.deploy.cmd,
                                        job.deploy.get_log_path())
            lines += ["  {})\n".format(job.index), "    {};;\n".format(cmd)]

        # Throw error as a sanity check if the job index is invalid.
        lines += [
            "  *)\n",
            "    echo \"ERROR: Illegal job index: $1\" 1>&2; exit 1;;\n",
            "esac\n"
        ]
        if Launcher.pyvenv:
            lines += ["deactivate\n"]

        job_script = Path(LsfLauncher.jobs_dir[cfg], job_name)
        try:
            with open(job_script, "w", encoding='utf-8') as f:
                f.writelines(lines)
        except IOError as e:
            err_msg = "ERROR: Failed to write {}:\n{}".format(job_script, e)
            LsfLauncher._post_finish_job_array(cfg, job_name, err_msg)
            raise LauncherError(err_msg)

        log.log(VERBOSE, "[job_script]: %s", job_script)
        return job_script
예제 #2
0
    def prepare_workspace(project, repo_top, args):
        '''Overrides Launcher.prepare_workspace.'''

        # Since we dispatch to remote machines, a project specific python
        # virtualenv is exists, needs to be activated when launching the job.
        Launcher.set_python_venv(project)
        if Launcher.python_venv is None:
            return

        # Python_venv needs to be a valid tarfile. Extract it in the scratch
        # area if it does not exist. It is upto the user to delete it if it is
        # stale.
        if tarfile.is_tarfile(Launcher.python_venv):
            path = Path(args.scratch_root, Path(Launcher.python_venv).stem)
            if not path.is_dir():
                with tarfile.open(Launcher.python_venv, mode='r') as tar:
                    tar.extractall(path=args.scratch_root)
            Launcher.python_venv = path

        else:
            raise LauncherError("{} is not a valid tar file".format(
                Launcher.python_venv))
예제 #3
0
    def _do_launch(self):
        # Update the shell's env vars with self.exports. Values in exports must
        # replace the values in the shell's env vars if the keys match.
        exports = os.environ.copy()
        if self.deploy.exports:
            exports.update(self.deploy.exports)

        # Clear the magic MAKEFLAGS variable from exports if necessary. This
        # variable is used by recursive Make calls to pass variables from one
        # level to the next. Here, self.cmd is a call to Make but it's
        # logically a top-level invocation: we don't want to pollute the flow's
        # Makefile with Make variables from any wrapper that called dvsim.
        if 'MAKEFLAGS' in exports:
            del exports['MAKEFLAGS']

        self._dump_env_vars(exports)

        args = shlex.split(self.deploy.cmd)
        try:
            f = open(self.deploy.get_log_path(),
                     "w",
                     encoding="UTF-8",
                     errors="surrogateescape")
            f.write("[Executing]:\n{}\n\n".format(self.deploy.cmd))
            f.flush()
            self.process = subprocess.Popen(args,
                                            bufsize=4096,
                                            universal_newlines=True,
                                            stdout=f,
                                            stderr=f,
                                            env=exports)
        except subprocess.SubprocessError as e:
            raise LauncherError('IO Error: {}\nSee {}'.format(
                e, self.deploy.get_log_path()))
        finally:
            self._close_process()

        self._link_odir("D")
예제 #4
0
    def _do_launch(self):
        # Add self to the list of jobs.
        job_name = self.deploy.job_name
        cfg = self.deploy.sim_cfg
        job_total = len(LsfLauncher.jobs[cfg][job_name])

        # The actual launching of the bsub command cannot happen until the
        # Scheduler has dispatched ALL jobs in the array.
        if self.index < job_total:
            return

        job_script = self.make_job_script(cfg, job_name)

        # Update the shell's env vars with self.exports. Values in exports must
        # replace the values in the shell's env vars if the keys match.
        exports = os.environ.copy()
        if self.deploy.exports:
            exports.update(self.deploy.exports)

        # Clear the magic MAKEFLAGS variable from exports if necessary. This
        # variable is used by recursive Make calls to pass variables from one
        # level to the next. Here, self.cmd is a call to Make but it's
        # logically a top-level invocation: we don't want to pollute the flow's
        # Makefile with Make variables from any wrapper that called dvsim.
        if 'MAKEFLAGS' in exports:
            del exports['MAKEFLAGS']

        self._dump_env_vars(exports)

        # TODO: Arbitrarily set the max slot-limit to 100.
        job_array = "{}[1-{}]".format(job_name, job_total)
        if job_total > 100:
            job_array += "%100"

        # TODO: This needs to be moved to a HJson.
        if self.deploy.sim_cfg.tool == "vcs":
            job_rusage = "\'rusage[vcssim=1,vcssim_dynamic=1:duration=1]\'"

        elif self.deploy.sim_cfg.tool == "xcelium":
            job_rusage = "\'rusage[xcelium=1,xcelium_dynamic=1:duration=1]\'"

        else:
            job_rusage = None

        # Launch the job array.
        cmd = [
            "bsub",
            # TODO: LSF project name could be site specific!
            "-P",
            cfg.project,
            "-J",
            job_array,
            "-oo",
            "{}.%I.out".format(job_script),
            "-eo",
            "{}.%I.out".format(job_script)
        ]

        if job_rusage:
            cmd += ["-R", job_rusage]

        cmd += ["/usr/bin/bash {} $LSB_JOBINDEX".format(job_script)]

        try:
            p = subprocess.run(cmd,
                               check=True,
                               timeout=60,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.PIPE,
                               env=exports)
        except subprocess.CalledProcessError as e:
            # Need to mark all jobs in this range with this fail pattern.
            err_msg = e.stderr.decode("utf-8").strip()
            self._post_finish_job_array(cfg, job_name, err_msg)
            raise LauncherError(err_msg)

        # Extract the job ID.
        result = p.stdout.decode("utf-8").strip()
        job_id = result.split('Job <')[1].split('>')[0]
        if not job_id:
            self._post_finish_job_array(cfg, job_name, "Job ID not found!")
            raise LauncherError(err_msg)

        for job in LsfLauncher.jobs[cfg][job_name]:
            job.bsub_out = Path("{}.{}.out".format(job_script, job.index))
            job.job_id = "{}[{}]".format(job_id, job.index)
            job._link_odir("D")