def ensure_running(self): '''Make sure that a fork server is running. This can be called from any process. Note that usually a child process will just reuse the forkserver started by its parent, so ensure_running() will do nothing. ''' with self._lock: resource_tracker.ensure_running() if self._forkserver_pid is not None: # forkserver was launched before, is it still running? pid, status = os.waitpid(self._forkserver_pid, os.WNOHANG) if not pid: # still alive return # dead, launch it again os.close(self._forkserver_alive_fd) self._forkserver_address = None self._forkserver_alive_fd = None self._forkserver_pid = None # XXX only thing that changed! cmd = ('from tractor._forkserver_override import main; ' + 'main(%d, %d, %r, **%r)') if self._preload_modules: desired_keys = {'main_path', 'sys_path'} data = spawn.get_preparation_data('ignore') data = {x: y for x, y in data.items() if x in desired_keys} else: data = {} with socket.socket(socket.AF_UNIX) as listener: address = connection.arbitrary_address('AF_UNIX') listener.bind(address) if not util.is_abstract_socket_namespace(address): os.chmod(address, 0o600) listener.listen() # all client processes own the write end of the "alive" pipe; # when they all terminate the read end becomes ready. alive_r, alive_w = os.pipe() try: fds_to_pass = [listener.fileno(), alive_r] cmd %= (listener.fileno(), alive_r, self._preload_modules, data) exe = spawn.get_executable() args = [exe] + util._args_from_interpreter_flags() args += ['-c', cmd] pid = util.spawnv_passfds(exe, args, fds_to_pass) except: os.close(alive_w) raise finally: os.close(alive_r) self._forkserver_address = address self._forkserver_alive_fd = alive_w self._forkserver_pid = pid
def _launch(self, process_obj): logger.debug("%s %s _launch called", process_obj, self) if config.ipc_active: logger.debug("%s ipc_active is set, launch background thread", self) self.launch_fiber_background_thread_if_needed() else: logger.debug("%s ipc_active is not set", self) # Setup networking ident = next(_event_counter) self.ident = ident # this needs to happen after self._setup_listen where port is decided global admin_host, admin_port if config.ipc_active: # in active mode, port is admin_port which could be 0 at first and # set to the actual port by the backend thread port = admin_port else: # in passive mode, port can't be 0 because this need to be # pre-determined information between master and workers. assert self.worker_port != 0, ( "port can't be 0 because this " "need to be pre-determined information between master " "and workers.") port = self.worker_port if config.ipc_active: assert admin_host is not None assert admin_port is not None cmd = self.get_command_line(cwd=os.getcwd(), host=admin_host, port=port, id=ident) job = self._get_job(cmd) event = threading.Event() event.clear() _event_dict[ident] = event logger.debug( "%s popen_fiber_spawn created event %s and set _event_dict[%s]", self, event, ident, ) # prepare data and serialize prep_data = spawn.get_preparation_data(process_obj.name) prep_data["fiber_config"] = config.get_object() # TODO(jiale) what is a better way to setup sys_path inside containers? prep_data.pop("sys_path", None) logger.debug("%s prep_data: %s", self, str(prep_data)) fp = io.BytesIO() set_spawning_popen(self) try: self._pickle_data(prep_data, fp) self._pickle_data(process_obj, fp) finally: set_spawning_popen(None) # Set process_obj._popen = self here so that we have have the # Process <-> Popen binding before _run_job() is called. After # _run_job() is called, we started a job with Fiber backend. So # Process <-> Popen has to exist so that Process.terminate() can # find Popen object and terminate the underlying job. Also, when # we serize process_obj above, we have to make sure that # Process <-> Popen binding doesn't exist so that Popen doesn't get # serialized becuase 1) it's not necessary and 2) some part of Popen # cannot be pickled. process_obj._popen = self # launch job job = self._run_job(job) self.pid = get_pid_from_jid(job.jid) # Fix process obj's pid process_obj.ident = self.pid post_data = {"pid": self.pid} self._pickle_data(post_data, fp) send_buffer = fp.getbuffer() if config.ipc_active: # (worker) active mode, wait for event to be set by # background thread done = False while not done: if self._exiting: logger.debug("process is exiting, don't wait for job to " "connect back") return done = event.wait(0.5) status = self.check_status() if status == ProcessStatus.STOPPED: return logger.debug( "popen_fiber_spawn is waiting for accept event %s to finish", event, ) conn = _event_dict[ident] logger.debug("got conn from _event_counter[%s]", ident) del _event_dict[ident] logger.debug("remove entry _event_counter[%s]", ident) else: # (worker) passive mode, check job status until it's ready while True: if self._exiting: logger.debug( "process is exiting, don't wait for job to start") return status = self.check_status() if status == ProcessStatus.STARTED: break logger.debug("waiting 1s for job to start, current status: %s", status) time.sleep(1) # connect to woker job if job.host is None: job.update() ip = job.host port = self.worker_port addr = (ip, port) conn = socket.socket(socket.AF_INET, socket.SOCK_STREAM) while True: if self._exiting: logger.debug( "process is exiting, don't try to connect to the job") return try: logger.debug("connecting to %s", addr) conn.connect(addr) except ConnectionRefusedError: # not ready yet, sleep logger.debug( "Fiber worker is not up yet, waiting 1s for the " "worker to listen on admin address: %s", addr, ) time.sleep(1) continue break logger.debug("send buffer") conn.send(send_buffer) # logger.debug("sent fp.getbuffer() to sub process") self.sentinel = conn logger.debug("_launch finished")