def test_run_different_timing(self): test_dir = tempfile.mkdtemp() params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "2", "--fail"], stdout=test_dir, stderr=test_dir, ), SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "1"], stdout=test_dir, stderr=test_dir, ), SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "5"], stdout=test_dir, stderr=test_dir, ), ] proc_context = start_processes(params_list, ) with self.assertRaises(ProcessException) as context: proc_context.wait(timeout=3) failed_rank = 0 for rank, proc in enumerate(proc_context.processes): if context.exception.pid == proc.pid: failed_rank = rank self._check_file( f"{test_dir}/{failed_rank}/stderr.log", "raising exception since --fail flag was set", ) shutil.rmtree(test_dir)
def _get_params( self, args, stdout=None, stderr=None, ) -> SubprocessParameters: return SubprocessParameters(args=args, stdout=stdout, stderr=stderr)
def test_run_async_success(self): nprocs = 4 params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "5"]) ] * nprocs proc_context = start_processes(params_list) self.assertIsNone(proc_context.wait(1)) proc_context.wait(5) self.assertFalse(proc_context._any_alive())
def test_wait_no_timeout(self): nprocs = 2 params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "5"], ) ] * nprocs proc_context = start_processes(params_list) self.assertEqual(None, proc_context.wait(1)) self.assertEqual(None, proc_context.wait(1)) proc_context.wait()
def test_run_different_timing(self): params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "2", "--fail"], stdout=self.test_dir, stderr=self.test_dir, ), SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "1"], stdout=self.test_dir, stderr=self.test_dir, ), SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "5"], stdout=self.test_dir, stderr=self.test_dir, ), ] proc_context = start_processes(params_list, ) proc_group_result = proc_context.wait(timeout=3) self.assertIsNotNone(proc_group_result.failure)
def test_terminate_proc(self): nprocs = 2 params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--run", "10"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) ] * nprocs proc_context = start_processes(params_list) proc_context.terminate() self.assertFalse(proc_context._any_alive())
def test_run_fail_group(self): os.environ["TORCHELASTIC_ERROR_FILE"] = f"{self.test_dir}/error.log" nprocs = 4 params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--fail"], stdout=self.test_dir, stderr=self.test_dir, ) ] * nprocs proc_context = start_processes(params_list) proc_group_result = proc_context.wait() self.assertIsNotNone(proc_group_result.failure)
def test_run_success(self): nprocs = 4 params_list = [ SubprocessParameters( args=[path("bin/test_script.py")], stdout=self.test_dir, stderr=self.test_dir, ) ] * nprocs proc_context = start_processes(params_list) proc_group_result = proc_context.wait() completed_processes = proc_group_result.return_values for rank in range(len(completed_processes.values())): self.assertTrue( self._check_file(f"{self.test_dir}/{rank}/stdout.log", "Success"))
def test_run_success(self): test_dir = tempfile.mkdtemp() nprocs = 4 params_list = [ SubprocessParameters( args=[path("bin/test_script.py")], stdout=test_dir, stderr=test_dir, ) ] * nprocs proc_context = start_processes(params_list) completed_processes = proc_context.wait() for rank in range(len(completed_processes.values())): self.assertTrue( self._check_file(f"{test_dir}/{rank}/stdout.log", "Success")) shutil.rmtree(test_dir)
def test_run_fail_group(self): test_dir = tempfile.mkdtemp() nprocs = 4 params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--fail"], stdout=test_dir, stderr=test_dir, ) ] * nprocs proc_context = start_processes(params_list) with self.assertRaises(ProcessException) as context: proc_context.wait() failed_proc_rank = 0 for idx, proc in enumerate(proc_context.processes): if context.exception.pid == proc.pid: failed_proc_rank = idx self._check_file( f"{test_dir}/{failed_proc_rank}/stderr.log", "raising exception since --fail flag was set", ) shutil.rmtree(test_dir)