def test_pipe_clone(self) -> None: msg = b"msg" pipe = subprocess_rpc.Pipe() alt_pipe_0 = subprocess_rpc.Pipe(write_handle=pipe.write_handle) alt_pipe_0.write(msg) self.assertEqual(msg, pipe.read()) with self.assertRaises(IOError): alt_pipe_0.read() alt_pipe_1 = subprocess_rpc.Pipe(read_handle=pipe.read_handle) pipe.write(msg) self.assertEqual(msg, alt_pipe_1.read()) with self.assertRaises(IOError): alt_pipe_1.write(msg)
def test_pipe_concurrent_timeout(self) -> None: result = {"callback_count": 0, "exceptions": []} def callback(): result["callback_count"] += 1 timeouts = [0.5, 1.0, 1.5] pipes = [ subprocess_rpc.Pipe(timeout=timeout, timeout_callback=callback) for timeout in timeouts ] def target(pipe): try: pipe.read() except Exception as e: result["exceptions"].append(e) threads = [threading.Thread(target=target, args=(pipe,)) for pipe in pipes] [t.start() for t in threads] [t.join(timeout=5) for t in threads] self.assertEqual(result["callback_count"], 3) self.assertEqual(len(result["exceptions"]), 3) for e in result["exceptions"]: with self.assertRaisesRegex(OSError, "Exceeded timeout:"): raise e
def test_pipe_stacked_read_write(self) -> None: pipe = subprocess_rpc.Pipe() pipe.write(b"abc") pipe.write(b"def") pipe.write(b"ghi") self.assertEqual(b"abc", pipe.read()) self.assertEqual(b"def", pipe.read()) self.assertEqual(b"ghi", pipe.read())
def test_pipe_basic_read_write(self) -> None: pipe = subprocess_rpc.Pipe() # Test small read. msg = b"abc" pipe.write(msg) self.assertEqual(msg, pipe.read()) # Test large read. msg = b"asdjkf" * 1024 pipe.write(msg) self.assertEqual(msg, pipe.read())
def test_pipe_timeout(self) -> None: result = {} def callback(): result["callback_run"] = True # We have to run this in a thread, because if the timeout mechanism # fails we don't want the entire unit test suite to hang. pipe = subprocess_rpc.Pipe(timeout=0.5, timeout_callback=callback) def target(): try: pipe.read() except Exception as e: result["e"] = e thread = threading.Thread(target=target) thread.daemon = True thread.start() thread.join(timeout=10) e: typing.Optional[Exception] = result.get("e", None) self.assertIsNotNone(e) with self.assertRaisesRegex(OSError, "Exceeded timeout: 0.5"): raise e self.assertTrue(result.get("callback_run", None), True)
def __init__(self, timeout: typing.Optional[float] = None) -> None: super().__init__() # Log inputs and outputs for debugging. self._command_log = os.path.join(self.working_dir, "commands.log") pathlib.Path(self._command_log).touch() self._stdout_f: io.FileIO = io.FileIO( os.path.join(self.working_dir, "stdout.txt"), mode="w", ) self._stderr_f: io.FileIO = io.FileIO( os.path.join(self.working_dir, "stderr.txt"), mode="w", ) # `self._run` has strong assumptions about how `_input_pipe` and # `_output_pipe` are used. They should not be accessed in any other # context. (The same is true for `self.load` and `_load_pipe`.) self._input_pipe = subprocess_rpc.Pipe() self._output_pipe = subprocess_rpc.Pipe( timeout=timeout, timeout_callback=self._kill_proc, ) self._load_pipe = subprocess_rpc.Pipe( timeout=timeout, timeout_callback=self._kill_proc, ) # Windows and Unix differ in how pipes are shared with children. # In Unix they are inherited, while in Windows the child consults the # OS to get access. Most of this complexity is handled by # `subprocess_rpc.Pipe`, however we also have to make sure Popen # exposes the pipes in a platform appropriate way. child_fds = [ self._input_pipe.read_fd, self._output_pipe.write_fd, self._load_pipe.write_fd, ] if subprocess_rpc.IS_WINDOWS: for fd in child_fds: os.set_inheritable(fd, True) startupinfo = subprocess.STARTUPINFO() startupinfo.lpAttributeList["handle_list"].extend( [subprocess_rpc.to_handle(fd) for fd in child_fds]) popen_kwargs = { "startupinfo": startupinfo, } else: popen_kwargs = { "close_fds": True, "pass_fds": child_fds, } self._proc = subprocess.Popen( args=self.args, stdin=subprocess.PIPE, stdout=self._stdout_f, stderr=self._stderr_f, encoding=subprocess_rpc.ENCODING, bufsize=1, cwd=os.getcwd(), **popen_kwargs, ) self._worker_bootstrap_finished: bool = False self._bootstrap_worker() self._alive = True
def _bootstrap_worker(self) -> None: """Import subprocess_rpc in the worker, and start the work loop. Commands are executed by writing to `self._input_pipe`, and waiting for a response on `self._output_pipe`. This presumes, however, that there is a worker doing the opposite: listening to the input pipe and writing to the output pipe. At startup `self._proc` is a simple interactive Python process, so we have to bootstrap it to start the work loop or else `self._run` will hang waiting for jobs to be processed. """ # NB: This gets sent directly to `self._proc`'s stdin, so it MUST be # a single expression and may NOT contain any empty lines. (Due to # how Python processes commands.) bootstrap_command = textwrap.dedent(f""" try: import marshal import sys sys_path_old = list(sys.path) sys.path = marshal.loads( bytes.fromhex({repr(marshal.dumps(sys.path).hex())}) ) # The parent gets priority, but a subclass could set PYTHONPATH # so we have to respect extra paths. sys.path.extend([i for i in sys_path_old if i and i not in sys.path]) from components._impl.workers import subprocess_rpc output_pipe = subprocess_rpc.Pipe( write_handle={self._output_pipe.write_handle}) output_pipe.write(subprocess_rpc.BOOTSTRAP_IMPORT_SUCCESS) subprocess_rpc.run_loop( input_handle={self._input_pipe.read_handle}, output_pipe=output_pipe, load_handle={self._load_pipe.write_handle}, ) except: sys.exit(1) """).strip() if self._proc.poll() is not None: raise ValueError("Process has already exited.") proc_stdin = self._proc.stdin assert proc_stdin is not None self._log_cmd(bootstrap_command) # We need two newlines for Python to stop waiting for more input. proc_stdin.write(f"{bootstrap_command}\n\n") proc_stdin.flush() with self.watch_stdout_stderr() as get_output: try: # Bootstrapping is very fast. (Unlike user code where we have # no a priori expected upper bound.) If we don't get a response # prior to the timeout, it is overwhelmingly likely that the # worker died or the bootstrap failed. (E.g. failed to resolve # import path.) This simply allows us to raise a good error. bootstrap_pipe = subprocess_rpc.Pipe( read_handle=self._output_pipe.read_handle, write_handle=self._output_pipe.write_handle, timeout=self._bootstrap_timeout, ) result = bootstrap_pipe.read() assert result == subprocess_rpc.BOOTSTRAP_IMPORT_SUCCESS, result result = bootstrap_pipe.read() assert result == subprocess_rpc.BOOTSTRAP_INPUT_LOOP_SUCCESS, result self._worker_bootstrap_finished = True assert self._proc.poll() is None except (Exception, KeyboardInterrupt) as e: stdout, stderr = get_output() cause = "import failed" if self._proc.poll() else "timeout" raise e from RuntimeError( f"Failed to bootstrap worker ({cause}):\n" f" working_dir: {self.working_dir}\n" f" stdout:\n{textwrap.indent(stdout, ' ' * 8)}\n\n" f" stderr:\n{textwrap.indent(stderr, ' ' * 8)}")