def test_run_different_timing(self): test_dir = tempfile.mkdtemp() params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "2", "--fail"], stdout=test_dir, stderr=test_dir, ), SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "1"], stdout=test_dir, stderr=test_dir, ), SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "5"], stdout=test_dir, stderr=test_dir, ), ] proc_context = start_processes(params_list, ) with self.assertRaises(ProcessException) as context: proc_context.wait(timeout=3) failed_rank = 0 for rank, proc in enumerate(proc_context.processes): if context.exception.pid == proc.pid: failed_rank = rank self._check_file( f"{test_dir}/{failed_rank}/stderr.log", "raising exception since --fail flag was set", ) shutil.rmtree(test_dir)
def test_run_stream_redirect_to_file(self): params_list = [ self._get_params( [path("bin/test_script.py")], stdout=self.test_dir, stderr=self.test_dir, ), self._get_params( [path("bin/test_script.py"), "--fail"], stdout=self.test_dir, stderr=self.test_dir, ), ] proc_context = start_processes(params_list) proc_group_result = proc_context.wait(timeout=3) self.assertIsNotNone(proc_group_result.failure) self.assertTrue(os.path.exists(f"{self.test_dir}/0/stdout.log")) self.assertTrue(os.path.exists(f"{self.test_dir}/0/stderr.log")) self.assertTrue(os.path.exists(f"{self.test_dir}/1/stdout.log")) self.assertTrue(os.path.exists(f"{self.test_dir}/1/stderr.log")) self.assertEqual("Success", self._read_file(f"{self.test_dir}/0/stdout.log")) self.assertTrue( "raising exception since --fail flag was set" in self._read_file(f"{self.test_dir}/1/stderr.log"), )
def test_run_stream_redirect_to_file(self): test_dir = tempfile.mkdtemp() params_list = [ self._get_params( [path("bin/test_script.py")], stdout=test_dir, stderr=test_dir, ), self._get_params( [path("bin/test_script.py"), "--fail"], stdout=test_dir, stderr=test_dir, ), ] proc_context = start_processes(params_list) with self.assertRaises(ProcessException): proc_context.wait() self.assertTrue(os.path.exists(f"{test_dir}/0/stdout.log")) self.assertTrue(os.path.exists(f"{test_dir}/0/stderr.log")) self.assertTrue(os.path.exists(f"{test_dir}/1/stdout.log")) self.assertTrue(os.path.exists(f"{test_dir}/1/stderr.log")) self.assertEqual("Success", self._read_file(f"{test_dir}/0/stdout.log")) self.assertTrue( "raising exception since --fail flag was set" in self._read_file(f"{test_dir}/1/stderr.log"), ) shutil.rmtree(test_dir)
def test_run_async_success(self): nprocs = 4 params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "5"]) ] * nprocs proc_context = start_processes(params_list) self.assertIsNone(proc_context.wait(1)) proc_context.wait(5) self.assertFalse(proc_context._any_alive())
def test_wait_no_timeout(self): nprocs = 2 params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "5"], ) ] * nprocs proc_context = start_processes(params_list) self.assertEqual(None, proc_context.wait(1)) self.assertEqual(None, proc_context.wait(1)) proc_context.wait()
def test_terminate_proc(self): nprocs = 2 params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--run", "10"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) ] * nprocs proc_context = start_processes(params_list) proc_context.terminate() self.assertFalse(proc_context._any_alive())
def test_run_fail_group(self): os.environ["TORCHELASTIC_ERROR_FILE"] = f"{self.test_dir}/error.log" nprocs = 4 params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--fail"], stdout=self.test_dir, stderr=self.test_dir, ) ] * nprocs proc_context = start_processes(params_list) proc_group_result = proc_context.wait() self.assertIsNotNone(proc_group_result.failure)
def start_subprocesses( params: List[SubprocessParameters], ): """ Starts processes via subprocess.Popen. Returns the process context that contains methods over a set of processes. """ proc_params = list(params) if len(proc_params) == 0: raise ValueError( "Params cannot be empty. Provide at least single SubprocessParameters object" ) return sp_context.start_processes(proc_params)
def test_run_success(self): nprocs = 4 params_list = [ SubprocessParameters( args=[path("bin/test_script.py")], stdout=self.test_dir, stderr=self.test_dir, ) ] * nprocs proc_context = start_processes(params_list) proc_group_result = proc_context.wait() completed_processes = proc_group_result.return_values for rank in range(len(completed_processes.values())): self.assertTrue( self._check_file(f"{self.test_dir}/{rank}/stdout.log", "Success"))
def test_run_success(self): test_dir = tempfile.mkdtemp() nprocs = 4 params_list = [ SubprocessParameters( args=[path("bin/test_script.py")], stdout=test_dir, stderr=test_dir, ) ] * nprocs proc_context = start_processes(params_list) completed_processes = proc_context.wait() for rank in range(len(completed_processes.values())): self.assertTrue( self._check_file(f"{test_dir}/{rank}/stdout.log", "Success")) shutil.rmtree(test_dir)
def test_std_different_dest(self): params_list = [ self._get_params( [path("bin/test_script.py")], stdout=self.test_dir, ), self._get_params( [path("bin/test_script.py"), "--fail"], stdout=self.test_dir, ), ] params_list[1].args.append("--fail") proc_context = start_processes(params_list) proc_group_result = proc_context.wait(timeout=3) self.assertIsNotNone(proc_group_result.failure) self.assertTrue(os.path.exists(f"{self.test_dir}/0/stdout.log")) self.assertTrue(os.path.exists(f"{self.test_dir}/1/stdout.log")) self.assertEqual("Success", self._read_file(f"{self.test_dir}/0/stdout.log"))
def test_run_different_timing(self): params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "2", "--fail"], stdout=self.test_dir, stderr=self.test_dir, ), SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "1"], stdout=self.test_dir, stderr=self.test_dir, ), SubprocessParameters( args=[path("bin/test_script.py"), "--wait", "5"], stdout=self.test_dir, stderr=self.test_dir, ), ] proc_context = start_processes(params_list, ) proc_group_result = proc_context.wait(timeout=3) self.assertIsNotNone(proc_group_result.failure)
def test_std_different_dest(self): test_dir = tempfile.mkdtemp() params_list = [ self._get_params( [path("bin/test_script.py")], stdout=test_dir, ), self._get_params( [path("bin/test_script.py"), "--fail"], stdout=test_dir, ), ] params_list[1].args.append("--fail") proc_context = start_processes(params_list) with self.assertRaises(ProcessException) as context: proc_context.wait() self.assertTrue(os.path.exists(f"{test_dir}/0/stdout.log")) self.assertTrue(os.path.exists(f"{test_dir}/1/stdout.log")) self.assertEqual("Success", self._read_file(f"{test_dir}/0/stdout.log")) shutil.rmtree(test_dir)
def test_run_fail_group(self): test_dir = tempfile.mkdtemp() nprocs = 4 params_list = [ SubprocessParameters( args=[path("bin/test_script.py"), "--fail"], stdout=test_dir, stderr=test_dir, ) ] * nprocs proc_context = start_processes(params_list) with self.assertRaises(ProcessException) as context: proc_context.wait() failed_proc_rank = 0 for idx, proc in enumerate(proc_context.processes): if context.exception.pid == proc.pid: failed_proc_rank = idx self._check_file( f"{test_dir}/{failed_proc_rank}/stderr.log", "raising exception since --fail flag was set", ) shutil.rmtree(test_dir)