def submit(self, filename): # This method has a workaround for problems seen on some Linux systems # (never on Mac). # When multiple processes call this method at about the same time, # one or more of the subprocesses do not get started. It seems like # something within Python gets locked up. # This workaround staggers starting of the subprocesses and prevents # the issue from occurring. output_path = Path(filename).parent with SoftFileLock(output_path / FakeManager.LOCK_FILE, timeout=30): self._job_id = self._get_next_job_id(output_path) self._subprocess_mgr = SubprocessManager() self._subprocess_mgr.run(filename) logger.info("Submit job with %s", self._job_id) time.sleep(1) return Status.GOOD, self._job_id, None
def test_subprocess_manager__run(command, timeout): """Should run command one at a time with""" mgr = SubprocessManager() mgr.run(command) ret = mgr.wait_for_completion() if command == "echo 'Hello'": assert ret == 0 assert mgr.return_code == 0 if command == "ls --invalidoption": assert ret != 0 assert mgr.return_code != 0 if command == "sleep 2": assert ret == 0 assert mgr.return_code == 0
def test_subprocess_manager__terminate(): """Should terminate subprocess on call terminate() method""" mgr = SubprocessManager() command = "sleep 10" mgr.run(command) assert mgr.in_progress() start = time.time() mgr.terminate() assert not mgr.in_progress() duration = time.time() - start assert duration < 5 assert mgr.return_code is None
def test_subprocess_manager__run__timeout(): """Should terminate run when timeout""" mgr = SubprocessManager() start = time.time() mgr.run("sleep 10", timeout=1) mgr.wait_for_completion() duration = time.time() - start assert duration < 5 assert mgr.return_code is None
def test_subprocess_manager__in_progress(): """Should return true if commands are still running""" mgr = SubprocessManager() command = "sleep 1" mgr.run(command) assert mgr.in_progress() is True
def test_subprocess_manager__run__no_wait(): """Should run command without blocking""" mgr = SubprocessManager() command = "sleep 2" mgr.run(command) assert mgr.in_progress() is True
class FakeManager(HpcManagerInterface): """Simulates management of HPC jobs.""" JOB_ID_FILE = "fake_manager_job_id.txt" LOCK_FILE = "fake_manager.lock" def __init__(self, config): self._subprocess_mgr = None self._job_id = None self._config = config def am_i_manager(self): assert False def cancel_job(self, job_id): return 0 def check_status(self, name=None, job_id=None): if self._subprocess_mgr is None: job_info = HpcJobInfo(job_id, "", HpcJobStatus.NONE) elif self._subprocess_mgr.in_progress(): job_info = HpcJobInfo(job_id, "", HpcJobStatus.RUNNING) else: job_info = HpcJobInfo(job_id, "", HpcJobStatus.COMPLETE) logger.debug("status=%s", job_info) return job_info def check_statuses(self): val = {self._job_id: self.check_status(job_id=self._job_id).status} return val def check_storage_configuration(self): pass def create_cluster(self): pass def create_local_cluster(self): pass def create_submission_script(self, name, script, filename, path): lines = [ "#!/bin/bash", str(script), ] create_script(filename, "\n".join(lines)) def get_config(self): return {"hpc": {}} def get_current_job_id(self): return None def get_local_scratch(self): return tempfile.gettempdir() def get_node_id(self): # If we try to use multi-node jobs in fake mode, this will cause a problem. return "0" @staticmethod def get_num_cpus(): return multiprocessing.cpu_count() def list_active_nodes(self, job_id): assert False def log_environment_variables(self): pass @staticmethod def _get_next_job_id(output_path): """Returns the next job ID and increments the index. A lock must be held while calling this method. """ # TODO: This could be enhanced to record completions. path = output_path / FakeManager.JOB_ID_FILE if path.exists(): job_id = int(path.read_text().strip()) else: job_id = 1 next_job_id = job_id + 1 path.write_text(str(next_job_id) + "\n") return job_id def submit(self, filename): # This method has a workaround for problems seen on some Linux systems # (never on Mac). # When multiple processes call this method at about the same time, # one or more of the subprocesses do not get started. It seems like # something within Python gets locked up. # This workaround staggers starting of the subprocesses and prevents # the issue from occurring. output_path = Path(filename).parent with SoftFileLock(output_path / FakeManager.LOCK_FILE, timeout=30): self._job_id = self._get_next_job_id(output_path) self._subprocess_mgr = SubprocessManager() self._subprocess_mgr.run(filename) logger.info("Submit job with %s", self._job_id) time.sleep(1) return Status.GOOD, self._job_id, None
def submit(self, filename): self._subprocess_mgr = SubprocessManager() self._subprocess_mgr.run(filename) job_id = "1234" return Status.GOOD, job_id, None
class FakeManager(HpcManagerInterface): """Simulates management of HPC jobs.""" _OPTIONAL_CONFIG_PARAMS = {} _REQUIRED_CONFIG_PARAMS = () def __init__(self, _): self._subprocess_mgr = None def cancel_job(self, job_id): return 0 def check_status(self, name=None, job_id=None): if self._subprocess_mgr is None: status = HpcJobInfo("", "", HpcJobStatus.NONE) elif self._subprocess_mgr.in_progress(): status = HpcJobInfo("", "", HpcJobStatus.RUNNING) else: status = HpcJobInfo("", "", HpcJobStatus.COMPLETE) logger.debug("status=%s", status) return status def check_storage_configuration(self): pass def create_cluster(self): pass def create_local_cluster(self): pass def create_submission_script(self, name, script, filename, path): lines = [ "#!/bin/bash", script, ] create_script(filename, "\n".join(lines)) def get_config(self): return {"hpc": {}} def get_local_scratch(self): for envvar in ("TMP", "TEMP"): tmpdir = os.environ.get(envvar) if tmpdir: return tmpdir return "." @staticmethod def get_num_cpus(): return multiprocessing.cpu_count() def get_optional_config_params(self): return self._OPTIONAL_CONFIG_PARAMS def get_required_config_params(self): return self._REQUIRED_CONFIG_PARAMS def log_environment_variables(self): pass def submit(self, filename): self._subprocess_mgr = SubprocessManager() self._subprocess_mgr.run(filename) job_id = "1234" return Status.GOOD, job_id, None