Ejemplo n.º 1
0
    def test_exec_cmd(self):
        cmd = ["echo", "hello-world"]
        pid, ecode, output = HostRunner._exec_cmd(cmd, logging=False)
        self.assertGreater(pid, 0)
        self.assertEqual(ecode, 0)
        self.assertEqual(output, "hello-world")

        with LogCapture("popper") as logc:
            pid, ecode, output = HostRunner._exec_cmd(cmd)
            self.assertGreater(pid, 0)
            self.assertEqual(ecode, 0)
            self.assertEqual(output, "")
            logc.check_present(("popper", "STEP_INFO", "hello-world"))

        cmd = ["env"]
        pid, ecode, output = HostRunner._exec_cmd(cmd,
                                                  env={"TESTACION": "test"},
                                                  cwd="/tmp",
                                                  logging=False)
        self.assertGreater(pid, 0)
        self.assertEqual(ecode, 0)
        self.assertTrue("TESTACION" in output)

        _pids = set()
        _, _, _ = HostRunner._exec_cmd(["sleep", "2"], pids=_pids)
        self.assertEqual(len(_pids), 1)
Ejemplo n.º 2
0
    def test_exec_cmd(self):
        cmd = ["echo", "hello-world"]
        pid, ecode, output = HostRunner._exec_cmd(cmd, logging=False)
        self.assertGreater(pid, 0)
        self.assertEqual(ecode, 0)
        self.assertEqual(output, "hello-world\n")

        with LogCapture('popper') as log:
            pid, ecode, output = HostRunner._exec_cmd(cmd)
            self.assertGreater(pid, 0)
            self.assertEqual(ecode, 0)
            self.assertEqual(output, "")
            log.check_present(('popper', 'STEP_INFO', 'hello-world\n'))

        cmd = ["env"]
        pid, ecode, output = HostRunner._exec_cmd(cmd,
                                                  env={'TESTACION': 'test'},
                                                  cwd="/tmp",
                                                  logging=False)
        self.assertGreater(pid, 0)
        self.assertEqual(ecode, 0)
        self.assertTrue('TESTACION' in output)

        _pids = set()
        _, _, _ = HostRunner._exec_cmd(["sleep", "2"], pids=_pids)
        self.assertEqual(len(_pids), 1)
Ejemplo n.º 3
0
 def test_stop_running_tasks(self):
     with PodmanRunner() as pr:
         cmd = ["podman", "run", "-d", "-q"]
         _, _, c1 = HostRunner._exec_cmd(
             cmd + ["debian:buster-slim", "sleep", "20000"],
             logging=False,
         )
         _, _, c2 = HostRunner._exec_cmd(cmd +
                                         ["alpine:3.9", "sleep", "10000"],
                                         logging=False)
         c1 = c1.strip()
         c2 = c2.strip()
         pr._spawned_containers.add(c1)
         pr._spawned_containers.add(c2)
         pr.stop_running_tasks()
         status_cmd = [
             "podman",
             "container",
             "inspect",
             "-f",
             str("{{.State.Status}}"),
         ]
         c1_status_cmd = status_cmd + [c1]
         c2_status_cmd = status_cmd + [c2]
         _, _, c1_status = HostRunner._exec_cmd(c1_status_cmd,
                                                logging=False)
         _, _, c2_status = HostRunner._exec_cmd(c2_status_cmd,
                                                logging=False)
         self.assertEqual(c1_status, "exited")
         self.assertEqual(c2_status, "exited")
Ejemplo n.º 4
0
    def _exec_srun(self, cmd, step, **kwargs):
        self._set_config_vars(step)
        _cmd = [
            "srun",
            "--nodes",
            f"{self._nodes}",
            "--ntasks",
            f"{self._ntasks}",
            "--ntasks-per-node",
            f"{self._ntasks_per_node}",
        ]

        if self._nodelist:
            _cmd.extend(["--nodelist", self._nodelist])

        _cmd.extend(self._get_resman_kwargs(step))
        _cmd.extend(cmd)

        log.debug(f"Command: {_cmd}")

        if self._config.dry_run:
            return 0

        _, ecode, _ = HostRunner._exec_cmd(_cmd, **kwargs)
        return ecode
Ejemplo n.º 5
0
 def test_stop_running_tasks(self):
     with HostRunner() as hr:
         with Popen(["sleep", "2000"]) as p:
             pid = p.pid
             hr._spawned_pids.add(pid)
             hr.stop_running_tasks()
     time.sleep(2)
     self.assertRaises(ProcessLookupError, os.kill, pid, 0)
Ejemplo n.º 6
0
 def test_find_container(self):
     config = ConfigLoader.load()
     step = Box(
         {
             "uses": "docker://alpine:3.9",
             "runs": ["echo hello"],
             "id": "kontainer_one",
         },
         default_box=True,
     )
     cid = pu.sanitized_name(step.id, config.wid)
     with PodmanRunner(init_podman_client=True, config=config) as pr:
         c = pr._find_container(cid)
         self.assertEqual(c, None)
     with PodmanRunner(init_podman_client=True, config=config) as pr:
         container = pr._create_container(cid, step)
         c = pr._find_container(cid)
         self.assertEqual(c, container)
         cmd = ["podman", "container", "rm", "-f", cid]
         HostRunner._exec_cmd(cmd, logging=False)
Ejemplo n.º 7
0
    def run(self, step):
        self._setup_singularity_cache()
        cid = pu.sanitized_name(step.id, self._config.wid) + ".sif"
        self._container = os.path.join(self._singularity_cache, cid)

        build, img, build_ctx_path = self._get_build_info(step)

        HostRunner._exec_cmd(["rm", "-rf", self._container])

        if not self._config.dry_run:
            if build:
                recipefile = self._get_recipe_file(build_ctx_path, cid)
                HostRunner._exec_cmd(
                    [
                        "singularity", "build", "--fakeroot", self._container,
                        recipefile
                    ],
                    cwd=build_ctx_path,
                )
            else:
                HostRunner._exec_cmd(
                    ["singularity", "pull", self._container, img])

        cmd = [self._create_cmd(step, cid)]

        self._spawned_containers.add(cid)
        ecode = self._submit_batch_job(cmd, step)
        self._spawned_containers.remove(cid)
        return ecode
Ejemplo n.º 8
0
    def run(self, step):
        self._setup_singularity_cache()
        cid = pu.sanitized_name(step['name'], self._config.wid) + '.sif'
        self._container = os.path.join(self._singularity_cache, cid)

        build, img, build_ctx_path = self._get_build_info(step)

        HostRunner._exec_cmd(['rm', '-rf', self._container])

        if not self._config.dry_run:
            if build:
                recipefile = self._get_recipe_file(build_ctx_path, cid)
                HostRunner._exec_cmd([
                    'singularity', 'build', '--fakeroot', self._container,
                    recipefile
                ],
                                     cwd=build_ctx_path)
            else:
                HostRunner._exec_cmd(
                    ['singularity', 'pull', self._container, img])

        cmd = [self._create_cmd(step, cid)]

        self._spawned_containers.add(cid)
        ecode = self._submit_batch_job(cmd, step)
        self._spawned_containers.remove(cid)
        return ecode
Ejemplo n.º 9
0
    def _exec_mpi(self, cmd, step, **kwargs):
        self._set_config_vars(step)
        job_name = pu.sanitized_name(step.id, self._config.wid)
        mpi_cmd = ["mpirun", f"{' '.join(cmd)}"]

        job_script = os.path.join(f"{job_name}.sh")
        out_file = os.path.join(f"{job_name}.out")

        with open(out_file, "w"):
            pass

        with open(job_script, "w") as f:
            f.write("#!/bin/bash\n")
            f.write(f"#SBATCH --job-name={job_name}\n")
            f.write(f"#SBATCH --output={out_file}\n")
            f.write(f"#SBATCH --nodes={self._nodes}\n")
            f.write(f"#SBATCH --ntasks={self._ntasks}\n")
            f.write(f"#SBATCH --ntasks-per-node={self._ntasks_per_node}\n")
            if self._nodelist:
                f.write(f"#SBATCH --nodelist={self._nodelist}\n")
            f.write(" ".join(mpi_cmd))

        sbatch_cmd = [
            "sbatch",
            "--wait",
        ]
        sbatch_cmd.extend(self._get_resman_kwargs(step))
        sbatch_cmd.extend([job_script])

        log.debug(f"Command: {sbatch_cmd}")

        if self._config.dry_run:
            return 0

        self._spawned_jobs.add(job_name)
        self._start_out_stream(out_file)

        _, ecode, _ = HostRunner._exec_cmd(sbatch_cmd, **kwargs)

        self._stop_out_stream()
        self._spawned_jobs.remove(job_name)

        return ecode
Ejemplo n.º 10
0
    def _submit_batch_job(self, cmd, step):
        job_name = pu.sanitized_name(step.id, self._config.wid)
        temp_dir = "/tmp/popper/slurm/"
        os.makedirs(temp_dir, exist_ok=True)

        job_script = os.path.join(temp_dir, f"{job_name}.sh")
        out_file = os.path.join(temp_dir, f"{job_name}.out")

        # create/truncate log
        with open(out_file, "w"):
            pass

        with open(job_script, "w") as f:
            f.write("#!/bin/bash\n")
            f.write("\n".join(cmd))

        sbatch_cmd = f"sbatch --wait --job-name {job_name} --output {out_file}"
        sbatch_cmd = sbatch_cmd.split()

        for k, v in self._config.resman_opts.get(step.id, {}).items():
            sbatch_cmd.append(pu.key_value_to_flag(k, v))

        sbatch_cmd.append(job_script)

        log.info(f'[{step.id}] {" ".join(sbatch_cmd)}')

        if self._config.dry_run:
            return 0

        self._spawned_jobs.add(job_name)

        # start a tail (background) process on the output file
        self._start_out_stream(out_file)

        # submit the job and wait
        _, ecode, output = HostRunner._exec_cmd(sbatch_cmd, logging=False)

        # kill the tail process
        self._stop_out_stream()

        self._spawned_jobs.remove(job_name)

        return ecode
Ejemplo n.º 11
0
 def test_create_container(self):
     config = ConfigLoader.load()
     step = Box(
         {
             "uses": "docker://alpine:3.9",
             "runs": ["echo hello"],
             "id": "kontainer_one",
         },
         default_box=True,
     )
     cid = pu.sanitized_name(step.id, config.wid)
     with PodmanRunner(init_podman_client=True, config=config) as pr:
         c = pr._create_container(cid, step)
         c_status_cmd = [
             "podman",
             "container",
             "inspect",
             "-f",
             str("{{.State.Status}}"),
             c,
         ]
         __, _, c_status = HostRunner._exec_cmd(c_status_cmd, logging=False)
         self.assertEqual(c_status, "configured")
         cmd = ["podman", "container", "rm", c]
         HostRunner._exec_cmd(cmd, logging=False)
     step = Box(
         {
             "uses": "docker://alpine:3.9",
             "runs": ["echo", "hello_world"],
             "id": "KoNtAiNeR tWo",
         },
         default_box=True,
     )
     cid = pu.sanitized_name(step.id, config.wid)
     with PodmanRunner(init_podman_client=True, config=config) as pr:
         c = pr._create_container(cid, step)
         c_status_cmd = [
             "podman",
             "container",
             "inspect",
             "-f",
             str("{{.State.Status}}"),
             c,
         ]
         __, _, c_status = HostRunner._exec_cmd(c_status_cmd, logging=False)
         self.assertEqual(c_status, "configured")
         cmd = ["podman", "container", "rm", c]
         HostRunner._exec_cmd(cmd, logging=False)
Ejemplo n.º 12
0
 def _tail_output(self, out_file):
     self._out_stream_pid = set()
     _, ecode, _ = HostRunner._exec_cmd(["tail", "-f", out_file],
                                        pids=self._out_stream_pid)
     return ecode
Ejemplo n.º 13
0
 def stop_running_tasks(self):
     for job_name in self._spawned_jobs:
         log.info(f"Cancelling job {job_name}")
         _, ecode, _ = HostRunner._exec_cmd(["scancel", "--name", job_name])
         if ecode != 0:
             log.warning(f"Failed to cancel the job {job_name}.")
Ejemplo n.º 14
0
 def stop_running_tasks(self):
     for job_name in self._spawned_jobs:
         log.info(f'Cancelling job {job_name}')
         _, ecode, _ = HostRunner._exec_cmd(['scancel', '--name', job_name])
         if ecode != 0:
             log.warning(f'Failed to cancel the job {job_name}.')