class RunPool: def __init__(self, ridc, worker_handlers, notifier, repo_backend): self.runs = dict() self.state_changed = Condition() self.ridc = ridc self.worker_handlers = worker_handlers self.notifier = notifier self.repo_backend = repo_backend def submit(self, expid, priority, due_date, flush, pipeline_name): # mutates expid to insert head repository revision if None. # called through scheduler. rid = self.ridc.get() if "repo_rev" in expid: if expid["repo_rev"] is None: expid["repo_rev"] = self.repo_backend.get_head_rev() wd, repo_msg = self.repo_backend.request_rev(expid["repo_rev"]) else: wd, repo_msg = None, None run = Run(rid, pipeline_name, wd, expid, priority, due_date, flush, self, repo_msg=repo_msg) self.runs[rid] = run self.state_changed.notify() return rid async def delete(self, rid): # called through deleter if rid not in self.runs: return run = self.runs[rid] await run.close() if "repo_rev" in run.expid: self.repo_backend.release_rev(run.expid["repo_rev"]) del self.runs[rid]
def __init__(self, ridc, worker_handlers, notifier, repo_backend): self.runs = dict() self.state_changed = Condition() self.ridc = ridc self.worker_handlers = worker_handlers self.notifier = notifier self.repo_backend = repo_backend
def __init__(self, ridc, worker_handlers, notifier, experiment_db): self.runs = dict() self.state_changed = Condition() self.ridc = ridc self.worker_handlers = worker_handlers self.notifier = notifier self.experiment_db = experiment_db
def __init__(self, name, ddb_entry): self.name = name self.command = ddb_entry["command"] self.retry_timer = ddb_entry.get("retry_timer", 5) self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1) self.host = ddb_entry["host"] self.port = ddb_entry["port"] self.ping_timer = ddb_entry.get("ping_timer", 30) self.ping_timeout = ddb_entry.get("ping_timeout", 30) self.term_timeout = ddb_entry.get("term_timeout", 30) self.retry_timer_cur = self.retry_timer self.retry_now = Condition() self.process = None self.launch_task = asyncio.Task(self.launcher())
class RunPool: def __init__(self, ridc, worker_handlers, notifier, repo_backend): self.runs = dict() self.state_changed = Condition() self.ridc = ridc self.worker_handlers = worker_handlers self.notifier = notifier self.repo_backend = repo_backend def submit(self, expid, priority, due_date, flush, pipeline_name): # mutates expid to insert head repository revision if None. # called through scheduler. rid = self.ridc.get() if "repo_rev" in expid: if expid["repo_rev"] is None: expid["repo_rev"] = self.repo_backend.get_head_rev() wd, repo_msg = self.repo_backend.request_rev(expid["repo_rev"]) else: wd, repo_msg = None, None run = Run(rid, pipeline_name, wd, expid, priority, due_date, flush, self, repo_msg=repo_msg) self.runs[rid] = run self.state_changed.notify() return rid @asyncio.coroutine def delete(self, rid): # called through deleter if rid not in self.runs: return run = self.runs[rid] yield from run.close() if "repo_rev" in run.expid: self.repo_backend.release_rev(run.expid["repo_rev"]) del self.runs[rid]
def __init__(self, name, ddb_entry): self.name = name self.command = ddb_entry["command"] self.retry_timer = ddb_entry.get("retry_timer", 5) self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1) self.host = ddb_entry["host"] self.port = ddb_entry["port"] self.ping_timer = ddb_entry.get("ping_timer", 30) self.ping_timeout = ddb_entry.get("ping_timeout", 30) self.term_timeout = ddb_entry.get("term_timeout", 30) self.retry_timer_cur = self.retry_timer self.retry_now = Condition() self.process = None self.launch_task = asyncio.ensure_future(self.launcher())
class Controller: def __init__(self, name, ddb_entry): self.name = name self.command = ddb_entry["command"] self.retry_timer = ddb_entry.get("retry_timer", 5) self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1) self.host = ddb_entry["host"] self.port = ddb_entry["port"] self.ping_timer = ddb_entry.get("ping_timer", 30) self.ping_timeout = ddb_entry.get("ping_timeout", 30) self.term_timeout = ddb_entry.get("term_timeout", 30) self.retry_timer_cur = self.retry_timer self.retry_now = Condition() self.process = None self.launch_task = asyncio.Task(self.launcher()) async def end(self): self.launch_task.cancel() await asyncio.wait_for(self.launch_task, None) async def _call_controller(self, method): remote = AsyncioClient() await remote.connect_rpc(self.host, self.port, None) try: targets, _ = remote.get_rpc_id() remote.select_rpc_target(targets[0]) r = await getattr(remote, method)() finally: remote.close_rpc() return r async def _ping(self): try: ok = await asyncio.wait_for(self._call_controller("ping"), self.ping_timeout) if ok: self.retry_timer_cur = self.retry_timer return ok except: return False async def _wait_and_ping(self): while True: try: await asyncio.wait_for(self.process.wait(), self.ping_timer) except asyncio.TimeoutError: logger.debug("pinging controller %s", self.name) ok = await self._ping() if not ok: logger.warning("Controller %s ping failed", self.name) await self._terminate() return else: break async def forward_logs(self, stream): source = "controller({})".format(self.name) while True: try: entry = (await stream.readline()) if not entry: break entry = entry[:-1] level, name, message = parse_log_message(entry.decode()) log_with_name(name, level, message, extra={"source": source}) except: logger.debug("exception in log forwarding", exc_info=True) break logger.debug("stopped log forwarding of stream %s of %s", stream, self.name) async def launcher(self): try: while True: logger.info("Starting controller %s with command: %s", self.name, self.command) try: self.process = await asyncio.create_subprocess_exec( *shlex.split(self.command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) asyncio.ensure_future( self.forward_logs(self.process.stdout)) asyncio.ensure_future( self.forward_logs(self.process.stderr)) await self._wait_and_ping() except FileNotFoundError: logger.warning("Controller %s failed to start", self.name) else: logger.warning("Controller %s exited", self.name) logger.warning("Restarting in %.1f seconds", self.retry_timer_cur) try: await asyncio.wait_for(self.retry_now.wait(), self.retry_timer_cur) except asyncio.TimeoutError: pass self.retry_timer_cur *= self.retry_timer_backoff except asyncio.CancelledError: await self._terminate() async def _terminate(self): logger.info("Terminating controller %s", self.name) if self.process is not None and self.process.returncode is None: try: await asyncio.wait_for(self._call_controller("terminate"), self.term_timeout) except: logger.warning( "Controller %s did not respond to terminate " "command, killing", self.name) self.process.kill() try: await asyncio.wait_for(self.process.wait(), self.term_timeout) except: logger.warning("Controller %s failed to exit, killing", self.name) self.process.kill() await self.process.wait() logger.debug("Controller %s terminated", self.name)
class Controller: def __init__(self, name, ddb_entry): self.name = name self.command = ddb_entry["command"] self.retry_timer = ddb_entry.get("retry_timer", 5) self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1) self.host = ddb_entry["host"] self.port = ddb_entry["port"] self.ping_timer = ddb_entry.get("ping_timer", 30) self.ping_timeout = ddb_entry.get("ping_timeout", 30) self.term_timeout = ddb_entry.get("term_timeout", 30) self.retry_timer_cur = self.retry_timer self.retry_now = Condition() self.process = None self.launch_task = asyncio.ensure_future(self.launcher()) async def end(self): self.launch_task.cancel() await asyncio.wait_for(self.launch_task, None) async def call(self, method, *args, **kwargs): remote = AsyncioClient() await remote.connect_rpc(self.host, self.port, None) try: targets, _ = remote.get_rpc_id() await remote.select_rpc_target(targets[0]) r = await getattr(remote, method)(*args, **kwargs) finally: remote.close_rpc() return r async def _ping(self): try: ok = await asyncio.wait_for(self.call("ping"), self.ping_timeout) if ok: self.retry_timer_cur = self.retry_timer return ok except: return False async def _wait_and_ping(self): while True: try: await asyncio.wait_for(self.process.wait(), self.ping_timer) except asyncio.TimeoutError: logger.debug("pinging controller %s", self.name) ok = await self._ping() if not ok: logger.warning("Controller %s ping failed", self.name) await self._terminate() return else: break def _get_log_source(self): return "controller({})".format(self.name) async def launcher(self): try: while True: logger.info("Starting controller %s with command: %s", self.name, self.command) try: env = os.environ.copy() env["PYTHONUNBUFFERED"] = "1" self.process = await asyncio.create_subprocess_exec( *shlex.split(self.command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, start_new_session=True) asyncio.ensure_future( LogParser(self._get_log_source).stream_task( self.process.stdout)) asyncio.ensure_future( LogParser(self._get_log_source).stream_task( self.process.stderr)) await self._wait_and_ping() except FileNotFoundError: logger.warning("Controller %s failed to start", self.name) else: logger.warning("Controller %s exited", self.name) logger.warning("Restarting in %.1f seconds", self.retry_timer_cur) try: await asyncio.wait_for(self.retry_now.wait(), self.retry_timer_cur) except asyncio.TimeoutError: pass self.retry_timer_cur *= self.retry_timer_backoff except asyncio.CancelledError: await self._terminate() async def _terminate(self): if self.process is None or self.process.returncode is not None: logger.info("Controller %s already terminated", self.name) return logger.debug("Terminating controller %s", self.name) try: await asyncio.wait_for(self.call("terminate"), self.term_timeout) await asyncio.wait_for(self.process.wait(), self.term_timeout) logger.info("Controller %s terminated", self.name) return except: logger.warning("Controller %s did not exit on request, " "ending the process", self.name) if os.name != "nt": try: self.process.terminate() except ProcessLookupError: pass try: await asyncio.wait_for(self.process.wait(), self.term_timeout) logger.info("Controller process %s terminated", self.name) return except asyncio.TimeoutError: logger.warning("Controller process %s did not terminate, " "killing", self.name) try: self.process.kill() except ProcessLookupError: pass try: await asyncio.wait_for(self.process.wait(), self.term_timeout) logger.info("Controller process %s killed", self.name) return except asyncio.TimeoutError: logger.warning("Controller process %s failed to die", self.name)
class Controller: def __init__(self, name, ddb_entry): self.name = name self.command = ddb_entry["command"] self.retry_timer = ddb_entry.get("retry_timer", 5) self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1) self.host = ddb_entry["host"] self.port = ddb_entry["port"] self.ping_timer = ddb_entry.get("ping_timer", 30) self.ping_timeout = ddb_entry.get("ping_timeout", 30) self.term_timeout = ddb_entry.get("term_timeout", 30) self.retry_timer_cur = self.retry_timer self.retry_now = Condition() self.process = None self.launch_task = asyncio.ensure_future(self.launcher()) async def end(self): self.launch_task.cancel() await asyncio.wait_for(self.launch_task, None) async def call(self, method, *args, **kwargs): remote = AsyncioClient() await remote.connect_rpc(self.host, self.port, None) try: targets, _ = remote.get_rpc_id() remote.select_rpc_target(targets[0]) r = await getattr(remote, method)(*args, **kwargs) finally: remote.close_rpc() return r async def _ping(self): try: ok = await asyncio.wait_for(self.call("ping"), self.ping_timeout) if ok: self.retry_timer_cur = self.retry_timer return ok except: return False async def _wait_and_ping(self): while True: try: await asyncio.wait_for(self.process.wait(), self.ping_timer) except asyncio.TimeoutError: logger.debug("pinging controller %s", self.name) ok = await self._ping() if not ok: logger.warning("Controller %s ping failed", self.name) await self._terminate() return else: break def _get_log_source(self): return "controller({})".format(self.name) async def launcher(self): try: while True: logger.info("Starting controller %s with command: %s", self.name, self.command) try: env = os.environ.copy() env["PYTHONUNBUFFERED"] = "1" self.process = await asyncio.create_subprocess_exec( *shlex.split(self.command), stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=env, start_new_session=True) asyncio.ensure_future( LogParser(self._get_log_source).stream_task( self.process.stdout)) asyncio.ensure_future( LogParser(self._get_log_source).stream_task( self.process.stderr)) await self._wait_and_ping() except FileNotFoundError: logger.warning("Controller %s failed to start", self.name) else: logger.warning("Controller %s exited", self.name) logger.warning("Restarting in %.1f seconds", self.retry_timer_cur) try: await asyncio.wait_for(self.retry_now.wait(), self.retry_timer_cur) except asyncio.TimeoutError: pass self.retry_timer_cur *= self.retry_timer_backoff except asyncio.CancelledError: await self._terminate() async def _terminate(self): if self.process is None or self.process.returncode is not None: logger.info("Controller %s already terminated", self.name) return logger.debug("Terminating controller %s", self.name) try: await asyncio.wait_for(self.call("terminate"), self.term_timeout) await asyncio.wait_for(self.process.wait(), self.term_timeout) logger.info("Controller %s terminated", self.name) return except: logger.warning("Controller %s did not exit on request, " "ending the process", self.name) if os.name != "nt": try: self.process.terminate() except ProcessLookupError: pass try: await asyncio.wait_for(self.process.wait(), self.term_timeout) logger.info("Controller process %s terminated", self.name) return except asyncio.TimeoutError: logger.warning("Controller process %s did not terminate, " "killing", self.name) try: self.process.kill() except ProcessLookupError: pass try: await asyncio.wait_for(self.process.wait(), self.term_timeout) logger.info("Controller process %s killed", self.name) return except asyncio.TimeoutError: logger.warning("Controller process %s failed to die", self.name)
class Controller: def __init__(self, name, ddb_entry): self.name = name self.command = ddb_entry["command"] self.retry_timer = ddb_entry.get("retry_timer", 5) self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1) self.host = ddb_entry["host"] self.port = ddb_entry["port"] self.ping_timer = ddb_entry.get("ping_timer", 30) self.ping_timeout = ddb_entry.get("ping_timeout", 30) self.term_timeout = ddb_entry.get("term_timeout", 30) self.retry_timer_cur = self.retry_timer self.retry_now = Condition() self.process = None self.launch_task = asyncio.Task(self.launcher()) @asyncio.coroutine def end(self): self.launch_task.cancel() yield from asyncio.wait_for(self.launch_task, None) @asyncio.coroutine def _call_controller(self, method): remote = AsyncioClient() yield from remote.connect_rpc(self.host, self.port, None) try: targets, _ = remote.get_rpc_id() remote.select_rpc_target(targets[0]) r = yield from getattr(remote, method)() finally: remote.close_rpc() return r @asyncio.coroutine def _ping(self): try: ok = yield from asyncio.wait_for(self._call_controller("ping"), self.ping_timeout) if ok: self.retry_timer_cur = self.retry_timer return ok except: return False @asyncio.coroutine def _wait_and_ping(self): while True: try: yield from asyncio_process_wait_timeout( self.process, self.ping_timer) except asyncio.TimeoutError: logger.debug("pinging controller %s", self.name) ok = yield from self._ping() if not ok: logger.warning("Controller %s ping failed", self.name) yield from self._terminate() return else: break @asyncio.coroutine def launcher(self): try: while True: logger.info("Starting controller %s with command: %s", self.name, self.command) try: self.process = yield from asyncio.create_subprocess_exec( *shlex.split(self.command)) yield from self._wait_and_ping() except FileNotFoundError: logger.warning("Controller %s failed to start", self.name) else: logger.warning("Controller %s exited", self.name) logger.warning("Restarting in %.1f seconds", self.retry_timer_cur) try: yield from asyncio.wait_for(self.retry_now.wait(), self.retry_timer_cur) except asyncio.TimeoutError: pass self.retry_timer_cur *= self.retry_timer_backoff except asyncio.CancelledError: yield from self._terminate() @asyncio.coroutine def _terminate(self): logger.info("Terminating controller %s", self.name) if self.process is not None and self.process.returncode is None: try: yield from asyncio.wait_for(self._call_controller("terminate"), self.term_timeout) except: logger.warning( "Controller %s did not respond to terminate " "command, killing", self.name) self.process.kill() try: yield from asyncio_process_wait_timeout( self.process, self.term_timeout) except: logger.warning("Controller %s failed to exit, killing", self.name) self.process.kill() yield from self.process.wait() logger.debug("Controller %s terminated", self.name)
class Controller: def __init__(self, name, ddb_entry): self.name = name self.command = ddb_entry["command"] self.retry_timer = ddb_entry.get("retry_timer", 5) self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1) self.host = ddb_entry["host"] self.port = ddb_entry["port"] self.ping_timer = ddb_entry.get("ping_timer", 30) self.ping_timeout = ddb_entry.get("ping_timeout", 30) self.term_timeout = ddb_entry.get("term_timeout", 30) self.retry_timer_cur = self.retry_timer self.retry_now = Condition() self.process = None self.launch_task = asyncio.Task(self.launcher()) async def end(self): self.launch_task.cancel() await asyncio.wait_for(self.launch_task, None) async def _call_controller(self, method): remote = AsyncioClient() await remote.connect_rpc(self.host, self.port, None) try: targets, _ = remote.get_rpc_id() remote.select_rpc_target(targets[0]) r = await getattr(remote, method)() finally: remote.close_rpc() return r async def _ping(self): try: ok = await asyncio.wait_for(self._call_controller("ping"), self.ping_timeout) if ok: self.retry_timer_cur = self.retry_timer return ok except: return False async def _wait_and_ping(self): while True: try: await asyncio.wait_for(self.process.wait(), self.ping_timer) except asyncio.TimeoutError: logger.debug("pinging controller %s", self.name) ok = await self._ping() if not ok: logger.warning("Controller %s ping failed", self.name) await self._terminate() return else: break async def forward_logs(self, stream): source = "controller({})".format(self.name) while True: try: entry = (await stream.readline()) if not entry: break entry = entry[:-1] level, name, message = parse_log_message(entry.decode()) log_with_name(name, level, message, extra={"source": source}) except: logger.debug("exception in log forwarding", exc_info=True) break logger.debug("stopped log forwarding of stream %s of %s", stream, self.name) async def launcher(self): try: while True: logger.info("Starting controller %s with command: %s", self.name, self.command) try: self.process = await asyncio.create_subprocess_exec( *shlex.split(self.command), stdout=subprocess.PIPE, stderr=subprocess.PIPE) asyncio.ensure_future(self.forward_logs( self.process.stdout)) asyncio.ensure_future(self.forward_logs( self.process.stderr)) await self._wait_and_ping() except FileNotFoundError: logger.warning("Controller %s failed to start", self.name) else: logger.warning("Controller %s exited", self.name) logger.warning("Restarting in %.1f seconds", self.retry_timer_cur) try: await asyncio.wait_for(self.retry_now.wait(), self.retry_timer_cur) except asyncio.TimeoutError: pass self.retry_timer_cur *= self.retry_timer_backoff except asyncio.CancelledError: await self._terminate() async def _terminate(self): logger.info("Terminating controller %s", self.name) if self.process is not None and self.process.returncode is None: try: await asyncio.wait_for(self._call_controller("terminate"), self.term_timeout) except: logger.warning("Controller %s did not respond to terminate " "command, killing", self.name) self.process.kill() try: await asyncio.wait_for(self.process.wait(), self.term_timeout) except: logger.warning("Controller %s failed to exit, killing", self.name) self.process.kill() await self.process.wait() logger.debug("Controller %s terminated", self.name)