예제 #1
0
    def get_container_id(self, service: str, running: bool = False) -> str:
        """Given a service name, tries to find a unique matching container id

        If running is True, only return running containers.
        """
        try:
            if running:
                cmd = f"docker ps".split()
            else:
                cmd = f"docker ps -a".split()
            list_containers = spawn.capture(cmd, unicode=True)

            pattern = re.compile(f"^(?P<c_id>[^ ]+).*{service}")
            matches = []
            for line in list_containers.splitlines():
                m = pattern.search(line)
                if m:
                    matches.append(m.group("c_id"))
            if len(matches) != 1:
                raise Failed(
                    f"failed to get a unique container id for service {service}, found: {matches}"
                )

            return matches[0]
        except subprocess.CalledProcessError as e:
            raise Failed(f"failed to get container id for {service}: {e}")
예제 #2
0
    def stop_and_start(self) -> None:
        try:
            spawn.runv(["docker", self._stop_cmd, self._container])
        except subprocess.CalledProcessError as e:
            raise Failed(f"Unable to {self._stop_cmd} container {self._container}: {e}")
        time.sleep(self._stopped_time)

        try:
            spawn.runv(["docker", self._run_cmd, self._container])
        except subprocess.CalledProcessError as e:
            raise Failed(f"Unable to {self._run_cmd} container {self._container}: {e}")
        time.sleep(self._running_time)
예제 #3
0
    def stop_and_start(self, container_id: str) -> None:
        try:
            spawn.runv(["docker", self._stop_cmd, container_id])
        except subprocess.CalledProcessError as e:
            raise Failed(f"Unable to {self._stop_cmd} container {container_id}: {e}")
        time.sleep(self._stop_time)

        try:
            spawn.runv(["docker", self._run_cmd, container_id])
        except subprocess.CalledProcessError as e:
            raise Failed(f"Unable to {self._run_cmd} container {container_id}: {e}")
        time.sleep(self._run_time)
예제 #4
0
 def run(self, comp: Composition, workflow: Workflow) -> None:
     container_id = comp.get_container_id(self._service)
     if self._running:
         if not comp.docker_container_is_running(container_id):
             raise Failed(f"chaos-confirm: container {container_id} is not running")
     else:
         if comp.docker_container_is_running(container_id):
             raise Failed(
                 f"chaos-confirm: expected {container_id} to have exited, is running"
             )
         actual_exit_code = comp.docker_inspect("{{.State.ExitCode}}", container_id)
         if actual_exit_code != f"'{self._exit_code}'":
             raise Failed(
                 f"chaos-confirm: expected exit code '{self._exit_code}' for {container_id}, found {actual_exit_code}"
             )
예제 #5
0
 def run(self, services: List[str]) -> None:
     """run mzcompose run in this directory"""
     with cd(self._path):
         try:
             mzcompose_run(services)
         except subprocess.CalledProcessError:
             raise Failed("error when bringing up all services")
예제 #6
0
    def run(self, comp: Composition, workflow: Workflow) -> None:
        if not self._chaos:
            self._chaos = self.default_chaos
        if not self._services:
            self._services = self.get_container_ids(running=True)
        say(f"will run these chaos types: {self._chaos} on these containers: {self._services}"
            )

        if not self._other_service:
            say(f"no 'other_service' provided, running chaos forever")
            while True:
                self.add_chaos()
        else:
            container_ids = self.get_container_ids(
                services=[self._other_service])
            if len(container_ids) != 1:
                raise Failed(
                    f"wrong number of container ids found for service {self._other_service}. expected 1, found: {len(container_ids)}"
                )

            container_id = container_ids[0]
            say(f"running chaos as long as {self._other_service} (container {container_id}) is running"
                )
            while comp.docker_container_is_running(container_id):
                self.add_chaos()
예제 #7
0
    def get_container_ids(self,
                          services: List[str] = [],
                          running: bool = False) -> List[str]:
        """
        Parse Docker processes for container ids.

        :param services: If provided, only return container ids for these services.
        :param running: If True, only return container ids of running processes.
        :return: Docker container id strs
        """
        try:
            docker_processes = self.get_docker_processes(running=running)

            patterns = []
            if services:
                for service in services:
                    patterns.append(f"^(?P<c_id>[^ ]+).*{service}")
            else:
                patterns.append(f"^(?P<c_id>[^ ]+).*")

            matches = []
            for pattern in patterns:
                compiled_pattern = re.compile(pattern)
                for process in docker_processes.splitlines():
                    m = compiled_pattern.search(process)
                    if m and m.group("c_id") != "CONTAINER":
                        matches.append(m.group("c_id"))

            return matches
        except subprocess.CalledProcessError as e:
            raise Failed(f"failed to get Docker container ids: {e}")
예제 #8
0
 def run(self, comp: Composition, workflow: Workflow) -> None:
     ui.progress(
         f"waiting for {self._host}:{self._port}",
         "C",
     )
     for remaining in ui.timeout_loop(self._timeout_secs):
         cmd = f"docker run --rm -t --network {comp.name}_default ubuntu:bionic-20200403".split(
         )
         cmd.extend([
             "timeout",
             str(self._timeout_secs),
             "bash",
             "-c",
             f"cat < /dev/null > /dev/tcp/{self._host}/{self._port}",
         ])
         try:
             spawn.capture(cmd, unicode=True, stderr_too=True)
         except subprocess.CalledProcessError as e:
             ui.log_in_automation(
                 "wait-for-tcp ({}:{}): error running {}: {}, stdout:\n{}\nstderr:\n{}"
                 .format(
                     self._host,
                     self._port,
                     ui.shell_quote(cmd),
                     e,
                     e.stdout,
                     e.stderr,
                 ))
             ui.progress(" {}".format(int(remaining)))
         else:
             ui.progress(" success!", finish=True)
             return
     raise Failed(f"Unable to connect to {self._host}:{self._port}")
예제 #9
0
 def run(self, comp: Composition, workflow: Workflow) -> None:
     container_id = comp.get_container_id(self._service)
     say(f"Killing container: {container_id}")
     try:
         spawn.runv(["docker", "kill", container_id])
     except subprocess.CalledProcessError as e:
         raise Failed(f"Unable to kill container {container_id}: {e}")
예제 #10
0
def wait_for_mysql(timeout_secs: int, user: str, passwd: str, host: str,
                   port: int) -> None:
    args = f"mysql user={user} host={host} port={port}"
    ui.progress(f"waitng for {args}", "C")
    error = None
    for _ in ui.timeout_loop(timeout_secs):
        try:
            conn = pymysql.connect(user=user,
                                   passwd=passwd,
                                   host=host,
                                   port=port)
            with conn.cursor() as cur:
                cur.execute("SELECT 1")
                result = cur.fetchone()
            if result == (1, ):
                print(f"success!")
                return
            else:
                print(f"weird, {args} did not return 1: {result}")
        except Exception as e:
            ui.progress(".")
            error = e
    ui.progress(finish=True)

    raise Failed(f"Never got correct result for {args}: {error}")
예제 #11
0
 def run(self, comp: Composition, workflow: Workflow) -> None:
     try:
         container_id = comp.get_container_id(self._service)
         cmd = f"docker exec {container_id} tc qdisc add dev eth0 root netem delay {self._delay}ms".split(
         )
         spawn.runv(cmd)
     except subprocess.CalledProcessError as e:
         raise Failed(f"Unable to delay container {container_id}: {e}")
예제 #12
0
 def confirm_exit_code(self, container_id: str,
                       expected_exit_code: int) -> None:
     actual_exit_code = self.docker_inspect("{{.State.ExitCode}}",
                                            container_id)
     if actual_exit_code != f"'{expected_exit_code}'":
         raise Failed(
             f"chaos-confirm: expected exit code '{expected_exit_code}' for {container_id}, found {actual_exit_code}"
         )
예제 #13
0
 def run(self, comp: Composition, workflow: Workflow) -> None:
     pattern = f"{comp.name}_{self._container}"
     ui.progress(f"Ensuring {self._container} stays up ", "C")
     for i in range(self._uptime_secs, 0, -1):
         time.sleep(1)
         try:
             stdout = spawn.capture(["docker", "ps", "--format={{.Names}}"],
                                    unicode=True)
         except subprocess.CalledProcessError as e:
             raise Failed(f"{e.stdout}")
         found = False
         for line in stdout.splitlines():
             if line.startswith(pattern):
                 found = True
                 break
         if not found:
             print(f"failed! {pattern} logs follow:")
             print_docker_logs(pattern, 10)
             raise Failed(f"container {self._container} stopped running!")
         ui.progress(f" {i}")
     print()
예제 #14
0
 def add_chaos(self) -> None:
     random_container = random.choice(self._services)
     random_chaos = random.choice(self._chaos)
     if random_chaos == "pause":
         self.add_and_remove_chaos(
             add_cmd=f"docker pause {random_container}",
             remove_cmd=f"docker unpause {random_container}",
         )
     elif random_chaos == "stop":
         self.add_and_remove_chaos(
             add_cmd=f"docker stop {random_container}",
             remove_cmd=f"docker start {random_container}",
         )
     elif random_chaos == "kill":
         self.add_and_remove_chaos(
             add_cmd=f"docker kill {random_container}",
             remove_cmd=f"docker start {random_container}",
         )
     elif random_chaos == "delay":
         self.add_and_remove_netem_chaos(
             container_id=random_container,
             add_cmd=
             f"docker exec -t {random_container} tc qdisc add dev eth0 root netem \
             delay 100ms 100ms distribution normal",
         )
     elif random_chaos == "rate":
         self.add_and_remove_netem_chaos(
             container_id=random_container,
             add_cmd=
             f"docker exec -t {random_container} tc qdisc add dev eth0 root netem \
             rate 5kbit 20 100 5",
         )
     elif random_chaos == "loss":
         self.add_and_remove_netem_chaos(
             container_id=random_container,
             add_cmd=
             f"docker exec -t {random_container} tc qdisc add dev eth0 root netem loss 10",
         )
     elif random_chaos == "duplicate":
         self.add_and_remove_netem_chaos(
             container_id=random_container,
             add_cmd=
             f"docker exec -t {random_container} tc qdisc add dev eth0 root netem duplicate 10",
         )
     elif random_chaos == "corrupt":
         self.add_and_remove_netem_chaos(
             container_id=random_container,
             add_cmd=
             f"docker exec -t {random_container} tc qdisc add dev eth0 root netem corrupt 10",
         )
     else:
         raise Failed(f"unexpected type of chaos: {random_chaos}")
예제 #15
0
 def docker_inspect(self, format: str, container_id: str) -> str:
     try:
         cmd = f"docker inspect -f '{format}' {container_id}".split()
         output = spawn.capture(cmd, unicode=True, stderr_too=True).splitlines()[0]
     except subprocess.CalledProcessError as e:
         ui.log_in_automation(
             "docker inspect ({}): error running {}: {}, stdout:\n{}\nstderr:\n{}".format(
                 container_id, ui.shell_quote(cmd), e, e.stdout, e.stderr,
             )
         )
         raise Failed(f"failed to inspect Docker container: {e}")
     else:
         return output
예제 #16
0
    def get_docker_processes(running: bool = False) -> str:
        """
        Use 'docker ps' to return all Docker process information.

        :param running: If True, only return running processes.
        :return: str of processes
        """
        try:
            if running:
                cmd = f"docker ps".split()
            else:
                cmd = f"docker ps -a".split()
            return spawn.capture(cmd, unicode=True)
        except subprocess.CalledProcessError as e:
            raise Failed(f"failed to get Docker container ids: {e}")
예제 #17
0
def wait_for_pg(
    timeout_secs: int,
    query: str,
    dbname: str,
    port: int,
    host: str,
    user: str,
    password: str,
    print_result: bool,
    expected: Union[Iterable[Any], Literal["any"]],
) -> None:
    """Wait for a pg-compatible database (includes materialized)
    """
    args = f"dbname={dbname} host={host} port={port} user={user} password={password}"
    ui.progress(f"waiting for {args} to handle {query!r}", "C")
    error = None
    for remaining in ui.timeout_loop(timeout_secs):
        try:
            conn = pg8000.connect(
                database=dbname,
                host=host,
                port=port,
                user=user,
                password=password,
                timeout=1,
            )
            cur = conn.cursor()
            cur.execute(query)
            if expected == "any" and cur.rowcount == -1:
                ui.progress("success!", finish=True)
                return
            result = list(cur.fetchall())
            if expected == "any" or result == expected:
                if print_result:
                    say(f"query result: {result}")
                else:
                    ui.progress("success!", finish=True)
                return
            else:
                say(f"host={host} port={port} did not return rows matching {expected} got: {result}"
                    )
        except Exception as e:
            ui.progress(" " + str(int(remaining)))
            error = e
    ui.progress(finish=True)
    raise Failed(f"never got correct result for {args}: {error}")
예제 #18
0
def wait_for_pg(
    timeout_secs: int,
    query: str,
    dbname: str,
    port: int,
    host: str,
    print_result: bool,
    expected: Union[Iterable[Any], Literal["any"]],
) -> None:
    """Wait for a pg-compatible database (includes materialized)
    """
    args = f"dbname={dbname} host={host} port={port} user=ignored"
    ui.progress(f"waiting for {args} to handle {query!r}", "C")
    error = None
    if isinstance(expected, tuple):
        expected = list(expected)
    for remaining in ui.timeout_loop(timeout_secs):
        try:
            conn = pg8000.connect(database=dbname,
                                  host=host,
                                  port=port,
                                  user="******",
                                  timeout=1)
            cur = conn.cursor()
            cur.execute(query)
            result = cur.fetchall()
            found_result = False
            for row in result:
                if expected == "any" or list(row) == expected:
                    if not found_result:
                        found_result = True
                        ui.progress(" up and responding!", finish=True)
                        if print_result:
                            say("query result:")
                    if print_result:
                        print(" ".join([str(r) for r in row]))
            if found_result:
                return
            else:
                say(f"host={host} port={port} did not return any row matching {expected} got: {result}"
                    )
        except Exception as e:
            ui.progress(" " + str(int(remaining)))
            error = e
    ui.progress(finish=True)
    raise Failed(f"never got correct result for {args}: {error}")
예제 #19
0
 def run(self, comp: Composition, workflow: Workflow) -> None:
     if self._port is None:
         ports = comp.find_host_ports(self._service)
         if len(ports) != 1:
             raise Failed(
                 f"Could not unambiguously determine port for {self._service} "
                 f"found: {','.join(ports)}")
         port = int(ports[0])
     else:
         port = self._port
     wait_for_mysql(
         user=self._user,
         passwd=self._password,
         host=self._host,
         port=port,
         timeout_secs=self._timeout_secs,
     )
예제 #20
0
 def run(self, comp: Composition, workflow: Workflow) -> None:
     if self._port is None:
         ports = comp.find_host_ports(self._service)
         if len(ports) != 1:
             raise Failed(
                 f"Unable to unambiguously determine port for {self._service}, "
                 f"found ports: {','.join(ports)}")
         port = int(ports[0])
     else:
         port = self._port
     wait_for_pg(
         dbname=self._dbname,
         host=self._host,
         port=port,
         timeout_secs=self._timeout_secs,
         query=self._query,
         expected=self._expected,
         print_result=self._print_result,
     )
예제 #21
0
 def run(self, comp: Composition, workflow: Workflow) -> None:
     if self._port is None:
         ports = comp.find_host_ports(self._service)
         if len(ports) != 1:
             raise Failed(
                 f"Could not unambiguously determine port for {self._service} "
                 f"found: {','.join(ports)}")
         port = int(ports[0])
     else:
         port = self._port
     conn = pymysql.connect(
         user=self._user,
         passwd=self._password,
         host=self._host,
         port=port,
         client_flag=pymysql.constants.CLIENT.MULTI_STATEMENTS,
         autocommit=True,
     )
     with conn.cursor() as cur:
         cur.execute(self._query)
예제 #22
0
 def run(self, comp: Composition) -> None:
     ui.progress(
         f"waiting for {self._host}:{self._port}",
         "C",
     )
     for remaining in ui.timeout_loop(self._timeout_secs):
         cmd = f"docker run --rm -it --network {comp.name}_default ubuntu:bionic-20200403".split(
         )
         cmd.extend([
             "timeout",
             str(self._timeout_secs),
             "bash",
             "-c",
             f"cat < /dev/null > /dev/tcp/{self._host}/{self._port}",
         ])
         try:
             spawn.capture(cmd, unicode=True, stderr_too=True)
         except subprocess.CalledProcessError:
             ui.progress(" {}".format(int(remaining)))
         else:
             ui.progress(" success!", finish=True)
             return
     raise Failed(f"Unable to connect to {self._host}:{self._port}")
예제 #23
0
 def run(self, services: List[str]) -> None:
     """run mzcompose run in this directory"""
     try:
         mzcompose_run(services, args=self._compose_args())
     except subprocess.CalledProcessError as e:
         raise Failed("error when bringing up all services") from e
예제 #24
0
 def run(self, comp: Composition, workflow: Workflow) -> None:
     try:
         workflow.mzcompose_run(self._command)
     except subprocess.CalledProcessError:
         raise Failed("giving up: {}".format(ui.shell_quote(self._command)))
예제 #25
0
 def run(self, comp: Composition, workflow: Workflow) -> None:
     try:
         workflow.mzcompose_up(self._services)
     except subprocess.CalledProcessError:
         services = ", ".join(self._services)
         raise Failed(f"ERROR: services didn't come up cleanly: {services}")
예제 #26
0
 def up(self, services: List[str]) -> None:
     with cd(self._path):
         try:
             mzcompose_up(services)
         except subprocess.CalledProcessError:
             raise Failed("error when bringing up all services")
예제 #27
0
 def run(self, comp: Composition, workflow: Workflow) -> None:
     try:
         cmd = self.get_cmd().split()
         spawn.runv(cmd)
     except subprocess.CalledProcessError as e:
         raise Failed(f"Unable to run netem chaos command: {e.stderr}")
예제 #28
0
 def up(self, services: List[str]) -> None:
     try:
         mzcompose_up(services, args=self._compose_args())
     except subprocess.CalledProcessError:
         raise Failed("error when bringing up all services")
예제 #29
0
 def confirm_is_running(self, container_id: str) -> None:
     if self.docker_inspect("{{.State.Running}}", container_id) != "'true'":
         raise Failed(
             f"chaos-confirm: container {container_id} is not running")
예제 #30
0
 def threaded_netem(self, cmd: List[str]) -> None:
     try:
         spawn.runv(cmd)
     except subprocess.CalledProcessError as e:
         raise Failed(f"Unable to run netem chaos command: {e}")