Exemple #1
0
 def submit(self, filename):
     # This method has a workaround for problems seen on some Linux systems
     # (never on Mac).
     # When multiple processes call this method at about the same time,
     # one or more of the subprocesses do not get started. It seems like
     # something within Python gets locked up.
     # This workaround staggers starting of the subprocesses and prevents
     # the issue from occurring.
     output_path = Path(filename).parent
     with SoftFileLock(output_path / FakeManager.LOCK_FILE, timeout=30):
         self._job_id = self._get_next_job_id(output_path)
         self._subprocess_mgr = SubprocessManager()
         self._subprocess_mgr.run(filename)
         logger.info("Submit job with %s", self._job_id)
         time.sleep(1)
         return Status.GOOD, self._job_id, None
Exemple #2
0
def test_subprocess_manager__run(command, timeout):
    """Should run command one at a time with"""
    mgr = SubprocessManager()
    mgr.run(command)
    ret = mgr.wait_for_completion()

    if command == "echo 'Hello'":
        assert ret == 0
        assert mgr.return_code == 0

    if command == "ls --invalidoption":
        assert ret != 0
        assert mgr.return_code != 0

    if command == "sleep 2":
        assert ret == 0
        assert mgr.return_code == 0
Exemple #3
0
def test_subprocess_manager__terminate():
    """Should terminate subprocess on call terminate() method"""
    mgr = SubprocessManager()
    command = "sleep 10"
    mgr.run(command)
    assert mgr.in_progress()

    start = time.time()
    mgr.terminate()
    assert not mgr.in_progress()
    duration = time.time() - start
    assert duration < 5
    assert mgr.return_code is None
Exemple #4
0
def test_subprocess_manager__run__timeout():
    """Should terminate run when timeout"""
    mgr = SubprocessManager()
    start = time.time()
    mgr.run("sleep 10", timeout=1)
    mgr.wait_for_completion()
    duration = time.time() - start
    assert duration < 5
    assert mgr.return_code is None
Exemple #5
0
def test_subprocess_manager__in_progress():
    """Should return true if commands are still running"""
    mgr = SubprocessManager()
    command = "sleep 1"
    mgr.run(command)
    assert mgr.in_progress() is True
Exemple #6
0
def test_subprocess_manager__run__no_wait():
    """Should run command without blocking"""
    mgr = SubprocessManager()
    command = "sleep 2"
    mgr.run(command)
    assert mgr.in_progress() is True
Exemple #7
0
class FakeManager(HpcManagerInterface):
    """Simulates management of HPC jobs."""

    JOB_ID_FILE = "fake_manager_job_id.txt"
    LOCK_FILE = "fake_manager.lock"

    def __init__(self, config):
        self._subprocess_mgr = None
        self._job_id = None
        self._config = config

    def am_i_manager(self):
        assert False

    def cancel_job(self, job_id):
        return 0

    def check_status(self, name=None, job_id=None):
        if self._subprocess_mgr is None:
            job_info = HpcJobInfo(job_id, "", HpcJobStatus.NONE)
        elif self._subprocess_mgr.in_progress():
            job_info = HpcJobInfo(job_id, "", HpcJobStatus.RUNNING)
        else:
            job_info = HpcJobInfo(job_id, "", HpcJobStatus.COMPLETE)

        logger.debug("status=%s", job_info)
        return job_info

    def check_statuses(self):
        val = {self._job_id: self.check_status(job_id=self._job_id).status}
        return val

    def check_storage_configuration(self):
        pass

    def create_cluster(self):
        pass

    def create_local_cluster(self):
        pass

    def create_submission_script(self, name, script, filename, path):
        lines = [
            "#!/bin/bash",
            str(script),
        ]
        create_script(filename, "\n".join(lines))

    def get_config(self):
        return {"hpc": {}}

    def get_current_job_id(self):
        return None

    def get_local_scratch(self):
        return tempfile.gettempdir()

    def get_node_id(self):
        # If we try to use multi-node jobs in fake mode, this will cause a problem.
        return "0"

    @staticmethod
    def get_num_cpus():
        return multiprocessing.cpu_count()

    def list_active_nodes(self, job_id):
        assert False

    def log_environment_variables(self):
        pass

    @staticmethod
    def _get_next_job_id(output_path):
        """Returns the next job ID and increments the index.
        A lock must be held while calling this method.

        """
        # TODO: This could be enhanced to record completions.
        path = output_path / FakeManager.JOB_ID_FILE
        if path.exists():
            job_id = int(path.read_text().strip())
        else:
            job_id = 1
        next_job_id = job_id + 1
        path.write_text(str(next_job_id) + "\n")
        return job_id

    def submit(self, filename):
        # This method has a workaround for problems seen on some Linux systems
        # (never on Mac).
        # When multiple processes call this method at about the same time,
        # one or more of the subprocesses do not get started. It seems like
        # something within Python gets locked up.
        # This workaround staggers starting of the subprocesses and prevents
        # the issue from occurring.
        output_path = Path(filename).parent
        with SoftFileLock(output_path / FakeManager.LOCK_FILE, timeout=30):
            self._job_id = self._get_next_job_id(output_path)
            self._subprocess_mgr = SubprocessManager()
            self._subprocess_mgr.run(filename)
            logger.info("Submit job with %s", self._job_id)
            time.sleep(1)
            return Status.GOOD, self._job_id, None
Exemple #8
0
 def submit(self, filename):
     self._subprocess_mgr = SubprocessManager()
     self._subprocess_mgr.run(filename)
     job_id = "1234"
     return Status.GOOD, job_id, None
Exemple #9
0
class FakeManager(HpcManagerInterface):
    """Simulates management of HPC jobs."""

    _OPTIONAL_CONFIG_PARAMS = {}
    _REQUIRED_CONFIG_PARAMS = ()

    def __init__(self, _):
        self._subprocess_mgr = None

    def cancel_job(self, job_id):
        return 0

    def check_status(self, name=None, job_id=None):
        if self._subprocess_mgr is None:
            status = HpcJobInfo("", "", HpcJobStatus.NONE)
        elif self._subprocess_mgr.in_progress():
            status = HpcJobInfo("", "", HpcJobStatus.RUNNING)
        else:
            status = HpcJobInfo("", "", HpcJobStatus.COMPLETE)

        logger.debug("status=%s", status)
        return status

    def check_storage_configuration(self):
        pass

    def create_cluster(self):
        pass

    def create_local_cluster(self):
        pass

    def create_submission_script(self, name, script, filename, path):
        lines = [
            "#!/bin/bash",
            script,
        ]
        create_script(filename, "\n".join(lines))

    def get_config(self):
        return {"hpc": {}}

    def get_local_scratch(self):
        for envvar in ("TMP", "TEMP"):
            tmpdir = os.environ.get(envvar)
            if tmpdir:
                return tmpdir
        return "."

    @staticmethod
    def get_num_cpus():
        return multiprocessing.cpu_count()

    def get_optional_config_params(self):
        return self._OPTIONAL_CONFIG_PARAMS

    def get_required_config_params(self):
        return self._REQUIRED_CONFIG_PARAMS

    def log_environment_variables(self):
        pass

    def submit(self, filename):
        self._subprocess_mgr = SubprocessManager()
        self._subprocess_mgr.run(filename)
        job_id = "1234"
        return Status.GOOD, job_id, None