Example #1
0
class RunPool:
    def __init__(self, ridc, worker_handlers, notifier, repo_backend):
        self.runs = dict()
        self.state_changed = Condition()

        self.ridc = ridc
        self.worker_handlers = worker_handlers
        self.notifier = notifier
        self.repo_backend = repo_backend

    def submit(self, expid, priority, due_date, flush, pipeline_name):
        # mutates expid to insert head repository revision if None.
        # called through scheduler.
        rid = self.ridc.get()
        if "repo_rev" in expid:
            if expid["repo_rev"] is None:
                expid["repo_rev"] = self.repo_backend.get_head_rev()
            wd, repo_msg = self.repo_backend.request_rev(expid["repo_rev"])
        else:
            wd, repo_msg = None, None
        run = Run(rid, pipeline_name, wd, expid, priority, due_date, flush,
                  self, repo_msg=repo_msg)
        self.runs[rid] = run
        self.state_changed.notify()
        return rid

    async def delete(self, rid):
        # called through deleter
        if rid not in self.runs:
            return
        run = self.runs[rid]
        await run.close()
        if "repo_rev" in run.expid:
            self.repo_backend.release_rev(run.expid["repo_rev"])
        del self.runs[rid]
Example #2
0
    def __init__(self, ridc, worker_handlers, notifier, repo_backend):
        self.runs = dict()
        self.state_changed = Condition()

        self.ridc = ridc
        self.worker_handlers = worker_handlers
        self.notifier = notifier
        self.repo_backend = repo_backend
Example #3
0
    def __init__(self, ridc, worker_handlers, notifier, experiment_db):
        self.runs = dict()
        self.state_changed = Condition()

        self.ridc = ridc
        self.worker_handlers = worker_handlers
        self.notifier = notifier
        self.experiment_db = experiment_db
Example #4
0
    def __init__(self, name, ddb_entry):
        self.name = name
        self.command = ddb_entry["command"]
        self.retry_timer = ddb_entry.get("retry_timer", 5)
        self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1)

        self.host = ddb_entry["host"]
        self.port = ddb_entry["port"]
        self.ping_timer = ddb_entry.get("ping_timer", 30)
        self.ping_timeout = ddb_entry.get("ping_timeout", 30)
        self.term_timeout = ddb_entry.get("term_timeout", 30)

        self.retry_timer_cur = self.retry_timer
        self.retry_now = Condition()
        self.process = None
        self.launch_task = asyncio.Task(self.launcher())
Example #5
0
    def __init__(self, ridc, worker_handlers, notifier, experiment_db):
        self.runs = dict()
        self.state_changed = Condition()

        self.ridc = ridc
        self.worker_handlers = worker_handlers
        self.notifier = notifier
        self.experiment_db = experiment_db
Example #6
0
    def __init__(self, ridc, worker_handlers, notifier, repo_backend):
        self.runs = dict()
        self.state_changed = Condition()

        self.ridc = ridc
        self.worker_handlers = worker_handlers
        self.notifier = notifier
        self.repo_backend = repo_backend
Example #7
0
class RunPool:
    def __init__(self, ridc, worker_handlers, notifier, repo_backend):
        self.runs = dict()
        self.state_changed = Condition()

        self.ridc = ridc
        self.worker_handlers = worker_handlers
        self.notifier = notifier
        self.repo_backend = repo_backend

    def submit(self, expid, priority, due_date, flush, pipeline_name):
        # mutates expid to insert head repository revision if None.
        # called through scheduler.
        rid = self.ridc.get()
        if "repo_rev" in expid:
            if expid["repo_rev"] is None:
                expid["repo_rev"] = self.repo_backend.get_head_rev()
            wd, repo_msg = self.repo_backend.request_rev(expid["repo_rev"])
        else:
            wd, repo_msg = None, None
        run = Run(rid,
                  pipeline_name,
                  wd,
                  expid,
                  priority,
                  due_date,
                  flush,
                  self,
                  repo_msg=repo_msg)
        self.runs[rid] = run
        self.state_changed.notify()
        return rid

    @asyncio.coroutine
    def delete(self, rid):
        # called through deleter
        if rid not in self.runs:
            return
        run = self.runs[rid]
        yield from run.close()
        if "repo_rev" in run.expid:
            self.repo_backend.release_rev(run.expid["repo_rev"])
        del self.runs[rid]
Example #8
0
    def __init__(self, name, ddb_entry):
        self.name = name
        self.command = ddb_entry["command"]
        self.retry_timer = ddb_entry.get("retry_timer", 5)
        self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1)

        self.host = ddb_entry["host"]
        self.port = ddb_entry["port"]
        self.ping_timer = ddb_entry.get("ping_timer", 30)
        self.ping_timeout = ddb_entry.get("ping_timeout", 30)
        self.term_timeout = ddb_entry.get("term_timeout", 30)

        self.retry_timer_cur = self.retry_timer
        self.retry_now = Condition()
        self.process = None
        self.launch_task = asyncio.ensure_future(self.launcher())
Example #9
0
class Controller:
    def __init__(self, name, ddb_entry):
        self.name = name
        self.command = ddb_entry["command"]
        self.retry_timer = ddb_entry.get("retry_timer", 5)
        self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1)

        self.host = ddb_entry["host"]
        self.port = ddb_entry["port"]
        self.ping_timer = ddb_entry.get("ping_timer", 30)
        self.ping_timeout = ddb_entry.get("ping_timeout", 30)
        self.term_timeout = ddb_entry.get("term_timeout", 30)

        self.retry_timer_cur = self.retry_timer
        self.retry_now = Condition()
        self.process = None
        self.launch_task = asyncio.Task(self.launcher())

    async def end(self):
        self.launch_task.cancel()
        await asyncio.wait_for(self.launch_task, None)

    async def _call_controller(self, method):
        remote = AsyncioClient()
        await remote.connect_rpc(self.host, self.port, None)
        try:
            targets, _ = remote.get_rpc_id()
            remote.select_rpc_target(targets[0])
            r = await getattr(remote, method)()
        finally:
            remote.close_rpc()
        return r

    async def _ping(self):
        try:
            ok = await asyncio.wait_for(self._call_controller("ping"),
                                        self.ping_timeout)
            if ok:
                self.retry_timer_cur = self.retry_timer
            return ok
        except:
            return False

    async def _wait_and_ping(self):
        while True:
            try:
                await asyncio.wait_for(self.process.wait(), self.ping_timer)
            except asyncio.TimeoutError:
                logger.debug("pinging controller %s", self.name)
                ok = await self._ping()
                if not ok:
                    logger.warning("Controller %s ping failed", self.name)
                    await self._terminate()
                    return
            else:
                break

    async def forward_logs(self, stream):
        source = "controller({})".format(self.name)
        while True:
            try:
                entry = (await stream.readline())
                if not entry:
                    break
                entry = entry[:-1]
                level, name, message = parse_log_message(entry.decode())
                log_with_name(name, level, message, extra={"source": source})
            except:
                logger.debug("exception in log forwarding", exc_info=True)
                break
        logger.debug("stopped log forwarding of stream %s of %s", stream,
                     self.name)

    async def launcher(self):
        try:
            while True:
                logger.info("Starting controller %s with command: %s",
                            self.name, self.command)
                try:
                    self.process = await asyncio.create_subprocess_exec(
                        *shlex.split(self.command),
                        stdout=subprocess.PIPE,
                        stderr=subprocess.PIPE)
                    asyncio.ensure_future(
                        self.forward_logs(self.process.stdout))
                    asyncio.ensure_future(
                        self.forward_logs(self.process.stderr))
                    await self._wait_and_ping()
                except FileNotFoundError:
                    logger.warning("Controller %s failed to start", self.name)
                else:
                    logger.warning("Controller %s exited", self.name)
                logger.warning("Restarting in %.1f seconds",
                               self.retry_timer_cur)
                try:
                    await asyncio.wait_for(self.retry_now.wait(),
                                           self.retry_timer_cur)
                except asyncio.TimeoutError:
                    pass
                self.retry_timer_cur *= self.retry_timer_backoff
        except asyncio.CancelledError:
            await self._terminate()

    async def _terminate(self):
        logger.info("Terminating controller %s", self.name)
        if self.process is not None and self.process.returncode is None:
            try:
                await asyncio.wait_for(self._call_controller("terminate"),
                                       self.term_timeout)
            except:
                logger.warning(
                    "Controller %s did not respond to terminate "
                    "command, killing", self.name)
                self.process.kill()
            try:
                await asyncio.wait_for(self.process.wait(), self.term_timeout)
            except:
                logger.warning("Controller %s failed to exit, killing",
                               self.name)
                self.process.kill()
                await self.process.wait()
        logger.debug("Controller %s terminated", self.name)
Example #10
0
class Controller:
    def __init__(self, name, ddb_entry):
        self.name = name
        self.command = ddb_entry["command"]
        self.retry_timer = ddb_entry.get("retry_timer", 5)
        self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1)

        self.host = ddb_entry["host"]
        self.port = ddb_entry["port"]
        self.ping_timer = ddb_entry.get("ping_timer", 30)
        self.ping_timeout = ddb_entry.get("ping_timeout", 30)
        self.term_timeout = ddb_entry.get("term_timeout", 30)

        self.retry_timer_cur = self.retry_timer
        self.retry_now = Condition()
        self.process = None
        self.launch_task = asyncio.ensure_future(self.launcher())

    async def end(self):
        self.launch_task.cancel()
        await asyncio.wait_for(self.launch_task, None)

    async def call(self, method, *args, **kwargs):
        remote = AsyncioClient()
        await remote.connect_rpc(self.host, self.port, None)
        try:
            targets, _ = remote.get_rpc_id()
            await remote.select_rpc_target(targets[0])
            r = await getattr(remote, method)(*args, **kwargs)
        finally:
            remote.close_rpc()
        return r

    async def _ping(self):
        try:
            ok = await asyncio.wait_for(self.call("ping"),
                                        self.ping_timeout)
            if ok:
                self.retry_timer_cur = self.retry_timer
            return ok
        except:
            return False

    async def _wait_and_ping(self):
        while True:
            try:
                await asyncio.wait_for(self.process.wait(),
                                       self.ping_timer)
            except asyncio.TimeoutError:
                logger.debug("pinging controller %s", self.name)
                ok = await self._ping()
                if not ok:
                    logger.warning("Controller %s ping failed", self.name)
                    await self._terminate()
                    return
            else:
                break

    def _get_log_source(self):
        return "controller({})".format(self.name)

    async def launcher(self):
        try:
            while True:
                logger.info("Starting controller %s with command: %s",
                            self.name, self.command)
                try:
                    env = os.environ.copy()
                    env["PYTHONUNBUFFERED"] = "1"
                    self.process = await asyncio.create_subprocess_exec(
                        *shlex.split(self.command),
                        stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                        env=env, start_new_session=True)
                    asyncio.ensure_future(
                        LogParser(self._get_log_source).stream_task(
                            self.process.stdout))
                    asyncio.ensure_future(
                        LogParser(self._get_log_source).stream_task(
                            self.process.stderr))
                    await self._wait_and_ping()
                except FileNotFoundError:
                    logger.warning("Controller %s failed to start", self.name)
                else:
                    logger.warning("Controller %s exited", self.name)
                logger.warning("Restarting in %.1f seconds",
                               self.retry_timer_cur)
                try:
                    await asyncio.wait_for(self.retry_now.wait(),
                                           self.retry_timer_cur)
                except asyncio.TimeoutError:
                    pass
                self.retry_timer_cur *= self.retry_timer_backoff
        except asyncio.CancelledError:
            await self._terminate()

    async def _terminate(self):
        if self.process is None or self.process.returncode is not None:
            logger.info("Controller %s already terminated", self.name)
            return
        logger.debug("Terminating controller %s", self.name)
        try:
            await asyncio.wait_for(self.call("terminate"), self.term_timeout)
            await asyncio.wait_for(self.process.wait(), self.term_timeout)
            logger.info("Controller %s terminated", self.name)
            return
        except:
            logger.warning("Controller %s did not exit on request, "
                           "ending the process", self.name)
        if os.name != "nt":
            try:
                self.process.terminate()
            except ProcessLookupError:
                pass
            try:
                await asyncio.wait_for(self.process.wait(), self.term_timeout)
                logger.info("Controller process %s terminated", self.name)
                return
            except asyncio.TimeoutError:
                logger.warning("Controller process %s did not terminate, "
                               "killing", self.name)
        try:
            self.process.kill()
        except ProcessLookupError:
            pass
        try:
            await asyncio.wait_for(self.process.wait(), self.term_timeout)
            logger.info("Controller process %s killed", self.name)
            return
        except asyncio.TimeoutError:
            logger.warning("Controller process %s failed to die", self.name)
Example #11
0
class Controller:
    def __init__(self, name, ddb_entry):
        self.name = name
        self.command = ddb_entry["command"]
        self.retry_timer = ddb_entry.get("retry_timer", 5)
        self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1)

        self.host = ddb_entry["host"]
        self.port = ddb_entry["port"]
        self.ping_timer = ddb_entry.get("ping_timer", 30)
        self.ping_timeout = ddb_entry.get("ping_timeout", 30)
        self.term_timeout = ddb_entry.get("term_timeout", 30)

        self.retry_timer_cur = self.retry_timer
        self.retry_now = Condition()
        self.process = None
        self.launch_task = asyncio.ensure_future(self.launcher())

    async def end(self):
        self.launch_task.cancel()
        await asyncio.wait_for(self.launch_task, None)

    async def call(self, method, *args, **kwargs):
        remote = AsyncioClient()
        await remote.connect_rpc(self.host, self.port, None)
        try:
            targets, _ = remote.get_rpc_id()
            remote.select_rpc_target(targets[0])
            r = await getattr(remote, method)(*args, **kwargs)
        finally:
            remote.close_rpc()
        return r

    async def _ping(self):
        try:
            ok = await asyncio.wait_for(self.call("ping"),
                                        self.ping_timeout)
            if ok:
                self.retry_timer_cur = self.retry_timer
            return ok
        except:
            return False

    async def _wait_and_ping(self):
        while True:
            try:
                await asyncio.wait_for(self.process.wait(),
                                       self.ping_timer)
            except asyncio.TimeoutError:
                logger.debug("pinging controller %s", self.name)
                ok = await self._ping()
                if not ok:
                    logger.warning("Controller %s ping failed", self.name)
                    await self._terminate()
                    return
            else:
                break

    def _get_log_source(self):
        return "controller({})".format(self.name)

    async def launcher(self):
        try:
            while True:
                logger.info("Starting controller %s with command: %s",
                            self.name, self.command)
                try:
                    env = os.environ.copy()
                    env["PYTHONUNBUFFERED"] = "1"
                    self.process = await asyncio.create_subprocess_exec(
                        *shlex.split(self.command),
                        stdout=subprocess.PIPE, stderr=subprocess.PIPE,
                        env=env, start_new_session=True)
                    asyncio.ensure_future(
                        LogParser(self._get_log_source).stream_task(
                            self.process.stdout))
                    asyncio.ensure_future(
                        LogParser(self._get_log_source).stream_task(
                            self.process.stderr))
                    await self._wait_and_ping()
                except FileNotFoundError:
                    logger.warning("Controller %s failed to start", self.name)
                else:
                    logger.warning("Controller %s exited", self.name)
                logger.warning("Restarting in %.1f seconds",
                               self.retry_timer_cur)
                try:
                    await asyncio.wait_for(self.retry_now.wait(),
                                           self.retry_timer_cur)
                except asyncio.TimeoutError:
                    pass
                self.retry_timer_cur *= self.retry_timer_backoff
        except asyncio.CancelledError:
            await self._terminate()

    async def _terminate(self):
        if self.process is None or self.process.returncode is not None:
            logger.info("Controller %s already terminated", self.name)
            return
        logger.debug("Terminating controller %s", self.name)
        try:
            await asyncio.wait_for(self.call("terminate"), self.term_timeout)
            await asyncio.wait_for(self.process.wait(), self.term_timeout)
            logger.info("Controller %s terminated", self.name)
            return
        except:
            logger.warning("Controller %s did not exit on request, "
                           "ending the process", self.name)
        if os.name != "nt":
            try:
                self.process.terminate()
            except ProcessLookupError:
                pass
            try:
                await asyncio.wait_for(self.process.wait(), self.term_timeout)
                logger.info("Controller process %s terminated", self.name)
                return
            except asyncio.TimeoutError:
                logger.warning("Controller process %s did not terminate, "
                               "killing", self.name)
        try:
            self.process.kill()
        except ProcessLookupError:
            pass
        try:
            await asyncio.wait_for(self.process.wait(), self.term_timeout)
            logger.info("Controller process %s killed", self.name)
            return
        except asyncio.TimeoutError:
            logger.warning("Controller process %s failed to die", self.name)
Example #12
0
class Controller:
    def __init__(self, name, ddb_entry):
        self.name = name
        self.command = ddb_entry["command"]
        self.retry_timer = ddb_entry.get("retry_timer", 5)
        self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1)

        self.host = ddb_entry["host"]
        self.port = ddb_entry["port"]
        self.ping_timer = ddb_entry.get("ping_timer", 30)
        self.ping_timeout = ddb_entry.get("ping_timeout", 30)
        self.term_timeout = ddb_entry.get("term_timeout", 30)

        self.retry_timer_cur = self.retry_timer
        self.retry_now = Condition()
        self.process = None
        self.launch_task = asyncio.Task(self.launcher())

    @asyncio.coroutine
    def end(self):
        self.launch_task.cancel()
        yield from asyncio.wait_for(self.launch_task, None)

    @asyncio.coroutine
    def _call_controller(self, method):
        remote = AsyncioClient()
        yield from remote.connect_rpc(self.host, self.port, None)
        try:
            targets, _ = remote.get_rpc_id()
            remote.select_rpc_target(targets[0])
            r = yield from getattr(remote, method)()
        finally:
            remote.close_rpc()
        return r

    @asyncio.coroutine
    def _ping(self):
        try:
            ok = yield from asyncio.wait_for(self._call_controller("ping"),
                                             self.ping_timeout)
            if ok:
                self.retry_timer_cur = self.retry_timer
            return ok
        except:
            return False

    @asyncio.coroutine
    def _wait_and_ping(self):
        while True:
            try:
                yield from asyncio_process_wait_timeout(
                    self.process, self.ping_timer)
            except asyncio.TimeoutError:
                logger.debug("pinging controller %s", self.name)
                ok = yield from self._ping()
                if not ok:
                    logger.warning("Controller %s ping failed", self.name)
                    yield from self._terminate()
                    return
            else:
                break

    @asyncio.coroutine
    def launcher(self):
        try:
            while True:
                logger.info("Starting controller %s with command: %s",
                            self.name, self.command)
                try:
                    self.process = yield from asyncio.create_subprocess_exec(
                        *shlex.split(self.command))
                    yield from self._wait_and_ping()
                except FileNotFoundError:
                    logger.warning("Controller %s failed to start", self.name)
                else:
                    logger.warning("Controller %s exited", self.name)
                logger.warning("Restarting in %.1f seconds",
                               self.retry_timer_cur)
                try:
                    yield from asyncio.wait_for(self.retry_now.wait(),
                                                self.retry_timer_cur)
                except asyncio.TimeoutError:
                    pass
                self.retry_timer_cur *= self.retry_timer_backoff
        except asyncio.CancelledError:
            yield from self._terminate()

    @asyncio.coroutine
    def _terminate(self):
        logger.info("Terminating controller %s", self.name)
        if self.process is not None and self.process.returncode is None:
            try:
                yield from asyncio.wait_for(self._call_controller("terminate"),
                                            self.term_timeout)
            except:
                logger.warning(
                    "Controller %s did not respond to terminate "
                    "command, killing", self.name)
                self.process.kill()
            try:
                yield from asyncio_process_wait_timeout(
                    self.process, self.term_timeout)
            except:
                logger.warning("Controller %s failed to exit, killing",
                               self.name)
                self.process.kill()
                yield from self.process.wait()
        logger.debug("Controller %s terminated", self.name)
Example #13
0
class Controller:
    def __init__(self, name, ddb_entry):
        self.name = name
        self.command = ddb_entry["command"]
        self.retry_timer = ddb_entry.get("retry_timer", 5)
        self.retry_timer_backoff = ddb_entry.get("retry_timer_backoff", 1.1)

        self.host = ddb_entry["host"]
        self.port = ddb_entry["port"]
        self.ping_timer = ddb_entry.get("ping_timer", 30)
        self.ping_timeout = ddb_entry.get("ping_timeout", 30)
        self.term_timeout = ddb_entry.get("term_timeout", 30)

        self.retry_timer_cur = self.retry_timer
        self.retry_now = Condition()
        self.process = None
        self.launch_task = asyncio.Task(self.launcher())

    async def end(self):
        self.launch_task.cancel()
        await asyncio.wait_for(self.launch_task, None)

    async def _call_controller(self, method):
        remote = AsyncioClient()
        await remote.connect_rpc(self.host, self.port, None)
        try:
            targets, _ = remote.get_rpc_id()
            remote.select_rpc_target(targets[0])
            r = await getattr(remote, method)()
        finally:
            remote.close_rpc()
        return r

    async def _ping(self):
        try:
            ok = await asyncio.wait_for(self._call_controller("ping"),
                                        self.ping_timeout)
            if ok:
                self.retry_timer_cur = self.retry_timer
            return ok
        except:
            return False

    async def _wait_and_ping(self):
        while True:
            try:
                await asyncio.wait_for(self.process.wait(),
                                       self.ping_timer)
            except asyncio.TimeoutError:
                logger.debug("pinging controller %s", self.name)
                ok = await self._ping()
                if not ok:
                    logger.warning("Controller %s ping failed", self.name)
                    await self._terminate()
                    return
            else:
                break

    async def forward_logs(self, stream):
        source = "controller({})".format(self.name)
        while True:
            try:
                entry = (await stream.readline())
                if not entry:
                    break
                entry = entry[:-1]
                level, name, message = parse_log_message(entry.decode())
                log_with_name(name, level, message, extra={"source": source})
            except:
                logger.debug("exception in log forwarding", exc_info=True)
                break
        logger.debug("stopped log forwarding of stream %s of %s",
            stream, self.name)


    async def launcher(self):
        try:
            while True:
                logger.info("Starting controller %s with command: %s",
                            self.name, self.command)
                try:
                    self.process = await asyncio.create_subprocess_exec(
                        *shlex.split(self.command),
                        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                    asyncio.ensure_future(self.forward_logs(
                        self.process.stdout))
                    asyncio.ensure_future(self.forward_logs(
                        self.process.stderr))
                    await self._wait_and_ping()
                except FileNotFoundError:
                    logger.warning("Controller %s failed to start", self.name)
                else:
                    logger.warning("Controller %s exited", self.name)
                logger.warning("Restarting in %.1f seconds",
                               self.retry_timer_cur)
                try:
                    await asyncio.wait_for(self.retry_now.wait(),
                                           self.retry_timer_cur)
                except asyncio.TimeoutError:
                    pass
                self.retry_timer_cur *= self.retry_timer_backoff
        except asyncio.CancelledError:
            await self._terminate()

    async def _terminate(self):
        logger.info("Terminating controller %s", self.name)
        if self.process is not None and self.process.returncode is None:
            try:
                await asyncio.wait_for(self._call_controller("terminate"),
                                       self.term_timeout)
            except:
                logger.warning("Controller %s did not respond to terminate "
                               "command, killing", self.name)
                self.process.kill()
            try:
                await asyncio.wait_for(self.process.wait(),
                                       self.term_timeout)
            except:
                logger.warning("Controller %s failed to exit, killing",
                               self.name)
                self.process.kill()
                await self.process.wait()
        logger.debug("Controller %s terminated", self.name)