def run(self, workflow: Workflow) -> None: container_id = workflow.composition.get_container_id(self._service) if self._running: if not workflow.composition.docker_container_is_running( container_id): raise errors.Failed( f"chaos-confirm: container {container_id} is not running") else: if self._wait: while workflow.composition.docker_container_is_running( container_id): say(f"chaos-confirm: waiting for {self._service} to exit") time.sleep(60) else: if workflow.composition.docker_container_is_running( container_id): raise errors.Failed( f"chaos-confirm: expected {container_id} to have exited, is running" ) actual_exit_code = workflow.composition.docker_inspect( "{{.State.ExitCode}}", container_id) if actual_exit_code != f"'{self._exit_code}'": raise errors.Failed( f"chaos-confirm: expected exit code '{self._exit_code}' for {container_id}, found {actual_exit_code}" )
def get_container_id(self, service: str, running: bool = False) -> str: """Given a service name, tries to find a unique matching container id If running is True, only return running containers. """ try: if running: cmd = f"docker ps".split() else: cmd = f"docker ps -a".split() list_containers = spawn.capture(cmd, unicode=True) pattern = re.compile(f"^(?P<c_id>[^ ]+).*{service}") matches = [] for line in list_containers.splitlines(): m = pattern.search(line) if m: matches.append(m.group("c_id")) if len(matches) != 1: raise errors.Failed( f"failed to get a unique container id for service {service}, found: {matches}" ) return matches[0] except subprocess.CalledProcessError as e: raise errors.Failed( f"failed to get container id for {service}: {e}")
def run(self, workflow: Workflow) -> None: say(f"Waiting for the service {self._service} to exit") ps_proc = workflow.run_compose(["ps", "-q", self._service], capture=True) container_ids = [ c for c in ps_proc.stdout.decode("utf-8").strip().split("\n") ] if len(container_ids) > 1: raise errors.Failed( f"Expected to get a single container for {self._service}; got: {container_ids}" ) elif not container_ids: raise errors.Failed( f"No containers returned for service {self._service}") container_id = container_ids[0] wait_cmd = ["docker", "wait", container_id] wait_proc = spawn.runv(wait_cmd, capture_output=True) return_codes = [ int(c) for c in wait_proc.stdout.decode("utf-8").strip().split("\n") ] if len(return_codes) != 1: raise errors.Failed( f"Expected single exit code for {container_id}; got: {return_codes}" ) return_code = return_codes[0] if return_code != self._expected_return_code: raise errors.Failed( f"Expected exit code {self._expected_return_code} for {container_id}; got: {return_code}" ) if self._print_logs: spawn.runv(["docker", "logs", container_id])
def run(self, workflow: Workflow) -> None: ui.progress(f"waiting for {self._host}:{self._port}", "C") for remaining in ui.timeout_loop(self._timeout_secs): cmd = f"docker run --rm -t --network {workflow.composition.name}_default ubuntu:bionic-20200403".split() try: executed = _check_tcp( cmd[:], self._host, self._port, self._timeout_secs ) except subprocess.CalledProcessError as e: ui.progress(" {}".format(int(remaining))) else: ui.progress(" success!", finish=True) return for dep in self._dependencies: host, port = dep["host"], dep["port"] try: _check_tcp( cmd[:], host, port, self._timeout_secs, kind="dependency " ) except subprocess.CalledProcessError as e: message = f"Dependency is down {host}:{port}" if "hint" in dep: message += f"\n hint: {dep['hint']}" raise errors.Failed(message) raise errors.Failed(f"Unable to connect to {self._host}:{self._port}")
def wait_for_mysql(timeout_secs: int, user: str, passwd: str, host: str, port: int) -> None: args = f"mysql user={user} host={host} port={port}" ui.progress(f"waiting for {args}", "C") error = None for _ in ui.timeout_loop(timeout_secs): try: conn = pymysql.connect(user=user, passwd=passwd, host=host, port=port) with conn.cursor() as cur: cur.execute("SELECT 1") result = cur.fetchone() if result == (1, ): print(f"success!") return else: print(f"weird, {args} did not return 1: {result}") except Exception as e: ui.progress(".") error = e ui.progress(finish=True) raise errors.Failed(f"Never got correct result for {args}: {error}")
def run(self, workflow: Workflow) -> None: ui.progress(f"waiting for {self._host}:{self._port}", "C") for remaining in ui.timeout_loop(self._timeout_secs): cmd = f"docker run --rm -t --network {workflow.composition.name}_default ubuntu:bionic-20200403".split( ) cmd.extend([ "timeout", str(self._timeout_secs), "bash", "-c", f"cat < /dev/null > /dev/tcp/{self._host}/{self._port}", ]) try: spawn.capture(cmd, unicode=True, stderr_too=True) except subprocess.CalledProcessError as e: ui.log_in_automation( "wait-for-tcp ({}:{}): error running {}: {}, stdout:\n{}\nstderr:\n{}" .format( self._host, self._port, ui.shell_quote(cmd), e, e.stdout, e.stderr, )) ui.progress(" {}".format(int(remaining))) else: ui.progress(" success!", finish=True) return raise errors.Failed(f"Unable to connect to {self._host}:{self._port}")
def run(self, workflow: Workflow) -> None: if not self._chaos: self._chaos = self.default_chaos if not self._services: self._services = self.get_container_names(running=True) say(f"will run these chaos types: {self._chaos} on these containers: {self._services}" ) if not self._other_service: say(f"no 'other_service' provided, running chaos forever") while True: self.add_chaos() else: container_ids = self.get_container_names( services=[self._other_service]) if len(container_ids) != 1: raise errors.Failed( f"wrong number of container ids found for service {self._other_service}. expected 1, found: {len(container_ids)}" ) container_id = container_ids[0] say(f"running chaos as long as {self._other_service} (container {container_id}) is running" ) while workflow.composition.docker_container_is_running( container_id): self.add_chaos()
def run(self, workflow: Workflow) -> None: try: workflow.run_compose(["restart", *self._services]) except subprocess.CalledProcessError: services = ", ".join(self._services) raise errors.Failed( f"ERROR: services didn't restart cleanly: {services}")
def run(self, workflow: Workflow) -> None: try: workflow.run_compose([ "run", *(["--service-ports"] if self._service_ports else []), *self._command, ]) except subprocess.CalledProcessError: raise errors.Failed("giving up: {}".format( ui.shell_quote(self._command)))
def run(self, workflow: Workflow) -> None: compose_cmd = ["kill"] if self._signal: compose_cmd.extend(["-s", self._signal]) compose_cmd.extend(self._services) try: workflow.run_compose(compose_cmd) except subprocess.CalledProcessError: services = ", ".join(self._services) raise errors.Failed(f"ERROR: services didn't die cleanly: {services}")
def add_chaos(self) -> None: random_container = random.choice(self._services) random_chaos = random.choice(self._chaos) if random_chaos == "pause": self.add_and_remove_chaos( add_cmd=f"docker pause {random_container}", remove_cmd=f"docker unpause {random_container}", ) elif random_chaos == "stop": self.add_and_remove_chaos( add_cmd=f"docker stop {random_container}", remove_cmd=f"docker start {random_container}", ) elif random_chaos == "kill": self.add_and_remove_chaos( add_cmd=f"docker kill {random_container}", remove_cmd=f"docker start {random_container}", ) elif random_chaos == "delay": self.add_and_remove_netem_chaos( container_id=random_container, add_cmd= f"docker exec -t {random_container} tc qdisc add dev eth0 root netem \ delay 100ms 100ms distribution normal", ) elif random_chaos == "rate": self.add_and_remove_netem_chaos( container_id=random_container, add_cmd= f"docker exec -t {random_container} tc qdisc add dev eth0 root netem \ rate 5kbit 20 100 5", ) elif random_chaos == "loss": self.add_and_remove_netem_chaos( container_id=random_container, add_cmd= f"docker exec -t {random_container} tc qdisc add dev eth0 root netem loss 10", ) elif random_chaos == "duplicate": self.add_and_remove_netem_chaos( container_id=random_container, add_cmd= f"docker exec -t {random_container} tc qdisc add dev eth0 root netem duplicate 10", ) elif random_chaos == "corrupt": self.add_and_remove_netem_chaos( container_id=random_container, add_cmd= f"docker exec -t {random_container} tc qdisc add dev eth0 root netem corrupt 10", ) else: raise errors.Failed(f"unexpected type of chaos: {random_chaos}")
def run(self, workflow: Workflow) -> None: pattern = f"{workflow.composition.name}_{self._container}" ui.progress(f"Ensuring {self._container} stays up ", "C") for i in range(self._uptime_secs, 0, -1): time.sleep(1) try: stdout = spawn.capture(["docker", "ps", "--format={{.Names}}"], unicode=True) except subprocess.CalledProcessError as e: raise errors.Failed(f"{e.stdout}") found = False for line in stdout.splitlines(): if line.startswith(pattern): found = True break if not found: print(f"failed! {pattern} logs follow:") print_docker_logs(pattern, 10) raise errors.Failed( f"container {self._container} stopped running!") ui.progress(f" {i}") print()
def run(self, workflow: Workflow) -> None: try: workflow.run_compose([ "rm", "-f", "-s", *(["-v"] if self._destroy_volumes else []), *self._services, ]) except subprocess.CalledProcessError: services = ", ".join(self._services) raise errors.Failed( f"ERROR: services didn't restart cleanly: {services}")
def docker_inspect(self, format: str, container_id: str) -> str: try: cmd = f"docker inspect -f '{format}' {container_id}".split() output = spawn.capture(cmd, unicode=True, stderr_too=True).splitlines()[0] except subprocess.CalledProcessError as e: ui.log_in_automation( "docker inspect ({}): error running {}: {}, stdout:\n{}\nstderr:\n{}" .format(container_id, ui.shell_quote(cmd), e, e.stdout, e.stderr)) raise errors.Failed(f"failed to inspect Docker container: {e}") else: return output
def wait_for_pg( timeout_secs: int, query: str, dbname: str, port: int, host: str, user: str, password: str, print_result: bool, expected: Union[Iterable[Any], Literal["any"]], ) -> None: """Wait for a pg-compatible database (includes materialized)""" args = f"dbname={dbname} host={host} port={port} user={user} password={password}" ui.progress(f"waiting for {args} to handle {query!r}", "C") error = None for remaining in ui.timeout_loop(timeout_secs): try: conn = pg8000.connect( database=dbname, host=host, port=port, user=user, password=password, timeout=1, ) # The default (autocommit = false) wraps everything in a transaction. conn.autocommit = True cur = conn.cursor() cur.execute(query) if expected == "any" and cur.rowcount == -1: ui.progress("success!", finish=True) return result = list(cur.fetchall()) if expected == "any" or result == expected: if print_result: say(f"query result: {result}") else: ui.progress("success!", finish=True) return else: say( f"host={host} port={port} did not return rows matching {expected} got: {result}" ) except Exception as e: ui.progress(" " + str(int(remaining))) error = e ui.progress(finish=True) raise errors.Failed(f"never got correct result for {args}: {error}")
def run(self, workflow: Workflow) -> None: if self._port is None: ports = workflow.composition.find_host_ports(self._service) if len(ports) != 1: raise errors.Failed( f"Could not unambiguously determine port for {self._service} " f"found: {','.join(ports)}") port = int(ports[0]) else: port = self._port wait_for_mysql( user=self._user, passwd=self._password, host=self._host, port=port, timeout_secs=self._timeout_secs, )
def get_docker_processes(running: bool = False) -> List[Dict[str, Any]]: """ Use 'docker ps' to return all Docker process information. :param running: If True, only return running processes. :return: str of processes """ try: if running: cmd = ["docker", "ps", "--format", "{{ json . }}"] else: cmd = ["docker", "ps", "-a", "--format", "{{ json . }}"] # json technically returns any out = spawn.capture(cmd, unicode=True) procs = [json.loads(line) for line in out.splitlines()] return cast(List[Dict[str, Any]], procs) except subprocess.CalledProcessError as e: raise errors.Failed(f"failed to get Docker container ids: {e}")
def run(self, workflow: Workflow) -> None: if self._port is None: ports = workflow.composition.find_host_ports(self._service) if len(ports) != 1: raise errors.Failed( f"Could not unambiguously determine port for {self._service} " f"found: {','.join(ports)}") port = int(ports[0]) else: port = self._port conn = pymysql.connect( user=self._user, passwd=self._password, host=self._host, port=port, client_flag=pymysql.constants.CLIENT.MULTI_STATEMENTS, autocommit=True, ) with conn.cursor() as cur: cur.execute(self._query)
def run(self, workflow: Workflow) -> None: if self._port is None: ports = workflow.composition.find_host_ports(self._service) if len(ports) != 1: raise errors.Failed( f"Unable to unambiguously determine port for {self._service}, " f"found ports: {','.join(ports)}") port = int(ports[0]) else: port = self._port wait_for_pg( dbname=self._dbname, host=self._host, port=port, timeout_secs=self._timeout_secs, query=self._query, user=self._user, password=self._password, expected=self._expected, print_result=self._print_result, )
def get_container_names(self, services: List[str] = [], running: bool = False) -> List[str]: """ Parse Docker processes for container names. :param services: If provided, only return container ids for these services. :param running: If True, only return container ids of running processes. :return: Docker container id strs """ matches = [] try: docker_processes = self.get_docker_processes(running=running) for process in docker_processes: if services: if any(s in process["Names"] for s in services): matches.append(process["Names"]) else: matches.append(process["Names"]) return matches except subprocess.CalledProcessError as e: raise errors.Failed(f"failed to get Docker container ids: {e}")