def restart_agent_container( self, hosts: typing.Optional[typing.List[str]] = None): '''Restart network agent containers on hosts Restart docker or podman containers and check network agents are up and running after it :parm hosts: List of hostnames to start agent on :type hosts: list of strings ''' hosts = hosts or self.hosts self.assertNotEqual([], hosts, "Host list is empty") self.container_name = (self.container_name or self.get_agent_container_name( self.agent_name)) if not self.container_name: self.skipTest(f"Missing container(s): '{self.container_name}'") for host in hosts: ssh_client = topology.get_openstack_node(hostname=host).ssh_client sh.execute(f'{self.container_runtime_name} restart ' f'{self.container_name}', ssh_client=ssh_client, sudo=True)
def is_destroyed(self, pid, command_filter, hostname): '''Check if process has been terminated :param pid: Process ID to check if exist and handles specific command :type pid: int :param command_filter: Patter to be found in process command details :type command_filter: string :param hostname: Hostname of the node to look for PID on :type hostname: string ''' host = topology.get_openstack_node(hostname=hostname) processes = sh.list_processes(ssh_client=host.ssh_client) process = processes.with_attributes(pid=pid) process_destroyed = False if not process: LOG.debug(f'No PID {pid} has been found in process list') process_destroyed = True else: try: command = sh.execute(f'cat /proc/{pid}/cmdline', ssh_client=host.ssh_client) if command_filter not in command.stdout: LOG.debug(f'Different process with same PID {pid} exist') process_destroyed = True except sh.ShellCommandFailed: LOG.debug(f'Process {pid} has been terminated right after the' f' process list has been collected') process_destroyed = True return process_destroyed
def get_amphora_compute_node(loadbalancer_id: str, lb_port: int, lb_protocol: str, ip_address: str) -> ( topology.OpenStackTopologyNode): """Gets the compute node which hosts the LB amphora This function finds the Overcloud compute node which hosts the amphora. In case there are more than 1 amphora (e.g. if the LB's topology is Active/standby), so the compute node which hosts the master amphora will be returned. :param loadbalancer_id (str): The loadbalancer ID. :param lb_port (int): The loadbalancer port. :param lb_protocol (str): The loadbalancer protocol. :param ip_address (str): The ip adress of the loadbalancer. :return (TripleoTopologyNode): The compute node which hosts the Amphora. """ amphorae = list_amphorae(loadbalancer_id) if len(amphorae) > 1: # For a high available LB amphora = get_master_amphora(amphorae=amphorae, lb_port=lb_port, lb_protocol=lb_protocol, ip_address=ip_address) else: amphora = amphorae[0] server = nova.get_server(amphora['compute_id']) hostname = getattr(server, 'OS-EXT-SRV-ATTR:hypervisor_hostname') return topology.get_openstack_node(hostname=hostname)
def _get_router_ips_from_namespaces(self, hostname): agent_host = topology.get_openstack_node(hostname=hostname) router_namespaces = ["qrouter-%s" % self.router['id']] if self.router.get('distributed'): router_namespaces.append("snat-%s" % self.router['id']) host_namespaces = ip.list_network_namespaces( ssh_client=agent_host.ssh_client) ips = [] for router_namespace in router_namespaces: self.assertIn(router_namespace, host_namespaces) ips += ip.list_ip_addresses( scope='global', network_namespace=router_namespace, ssh_client=agent_host.ssh_client) return ips
def test_router_not_created_on_compute_if_no_instance_connected(self): '''Test that no router namespace is created for DVR on compute node Namespace should be only created if there is VM with router that is set as a default gateway. Need to verify that there will be no namespace created on the compute node where VM is connected to the external network. The same network is used as the default gateway for the router ''' router_namespace = f'qrouter-{self.router_stack.gateway_details["id"]}' cirros_hypervisor = topology.get_openstack_node( hostname=self.server_stack.hypervisor_host) namespaces = ip.list_network_namespaces( ssh_client=cirros_hypervisor.ssh_client) self.assertNotIn(router_namespace, namespaces)
def _check_routers_namespace_on_host(self, hostname, state="master"): router_namespace = "qrouter-%s" % self.router['id'] agent_host = topology.get_openstack_node(hostname=hostname) namespaces = ip.list_network_namespaces( ssh_client=agent_host.ssh_client) self.assertIn(router_namespace, namespaces) namespace_ips = ip.list_ip_addresses( scope='global', network_namespace=router_namespace, ssh_client=agent_host.ssh_client) missing_ips = set(self.router_ips) - set(namespace_ips) if state == "master": self.assertFalse(missing_ips) else: self.assertTrue(missing_ips)
def kill_pids(self, host, pids): '''Kill processes with specific PIDs on the host :param host: Hostname of the node to kill processes on :type host: string :param pids: List of PIDs to be killed :type pids: list of int ''' ssh_client = topology.get_openstack_node(hostname=host).ssh_client pid_args = ' '.join(str(pid) for pid in pids) sh.execute(f'kill -15 {pid_args}', ssh_client=ssh_client, sudo=True) retry = tobiko.retry(timeout=60, interval=2) for _ in retry: pid_status = sh.execute(f'kill -0 {pid_args}', ssh_client=ssh_client, expect_exit_status=None, sudo=True).stderr.strip().split('\n') if all('No such process' in status for status in pid_status) and \ len(pid_status) == len(pids): break
def _do_agent_action(self, action: str, hosts: typing.Optional[typing.List[str]] = None): '''Do action on network agent on hosts It ensures that given operation, like START, STOP or RESTART of the service is done using systemd service or container. :parm action: String with action to do, it can be one of the following: start, stop, restart :parm hosts: List of hostnames to do action on :type hosts: list of strings ''' hosts = hosts or self.hosts self.assertNotEqual([], hosts, "Host list is empty") for host in hosts: ssh_client = topology.get_openstack_node(hostname=host).ssh_client is_systemd = topology.check_systemd_monitors_agent(host, self.agent_name) if is_systemd: LOG.debug(f"{action} service '{self.service_name}' on " f"host '{host}'...") sh.execute(f"systemctl {action} {self.service_name}", ssh_client=ssh_client, sudo=True) LOG.debug(f"{action} of the service '{self.service_name}' " f"on host '{host}' done.") else: if self.container_name == '': self.container_name = self.get_agent_container_name( self.agent_name) LOG.debug(f'{action} container {self.container_name} on ' f"host '{host}'...") sh.execute(f'{self.container_runtime_name} {action} ' f'{self.container_name}', ssh_client=ssh_client, sudo=True) LOG.debug(f'{action} of the container {self.container_name} ' f"on host '{host}' done.")
def list_pids(self, host, command_filter, process_name): '''Search for PIDs matched with filter and process name :param host: Hostname of the node to search processes on :type host: string :param command_filter: Regex to be found in process command details :type command_filter: string :param process_name: Name of the executable in process list :type process_name: string ''' ssh_client = topology.get_openstack_node(hostname=host).ssh_client processes = sh.list_processes(command=process_name, ssh_client=ssh_client) pids = [] for process in processes: try: command = sh.execute(f'cat /proc/{process.pid}/cmdline', ssh_client=ssh_client) if re.search(command_filter, command.stdout): pids.append(process.pid) except sh.ShellCommandFailed: LOG.debug(f'Process {process.pid} has been terminated right ' f'after the process list has been collected') return pids
def ovn_dbs_are_synchronized(test_case): from tobiko.tripleo import containers # declare commands runtime_name = containers.get_container_runtime_name() search_container_cmd = ( "%s ps --format '{{.Names}}' -f name=ovn-dbs-bundle" % runtime_name) container_cmd_prefix = ('%s exec -uroot {container}' % runtime_name) ovndb_sync_cmd = ('ovs-appctl -t /var/run/openvswitch/{ovndb_ctl_file} ' 'ovsdb-server/sync-status') ovndb_show_cmd = '{ovndb} show' ovndb_ctl_file_dict = {'nb': 'ovnnb_db.ctl', 'sb': 'ovnsb_db.ctl'} ovndb_dict = {'nb': 'ovn-nbctl', 'sb': 'ovn-sbctl'} expected_state_active_str = 'state: active' expected_state_backup_str = 'state: backup' # use ovn master db as a reference ovn_master_node_name = pacemaker.get_ovn_db_master_node() test_case.assertEqual(1, len(ovn_master_node_name)) ovn_master_node = topology.get_openstack_node(ovn_master_node_name[0]) ovn_master_dbs_show_dict = {} # obtained the container name container_name = sh.execute(search_container_cmd, ssh_client=ovn_master_node.ssh_client, sudo=True).stdout.splitlines()[0] for db in ('nb', 'sb'): # check its synchronization is active sync_cmd = (' '.join( (container_cmd_prefix, ovndb_sync_cmd)).format(container=container_name, ovndb_ctl_file=ovndb_ctl_file_dict[db])) sync_status = sh.execute(sync_cmd, ssh_client=ovn_master_node.ssh_client, sudo=True).stdout test_case.assertIn(expected_state_active_str, sync_status) # obtain nb and sb show output show_cmd = (' '.join((container_cmd_prefix, ovndb_show_cmd)).format(container=container_name, ovndb=ovndb_dict[db])) ovn_db_show = sh.execute(show_cmd, ssh_client=ovn_master_node.ssh_client, sudo=True).stdout ovn_master_dbs_show_dict[db] = build_ovn_db_show_dict(ovn_db_show) # ovn dbs are located on the controller nodes for node in topology.list_openstack_nodes(group='controller'): if node.name == ovn_master_node.name: # master node is the reference and do not need to be checked again continue container_name = sh.execute(search_container_cmd, ssh_client=node.ssh_client, sudo=True).stdout.splitlines()[0] # verify ovn nb and sb dbs are synchronized ovn_dbs_show_dict = {} for db in ('nb', 'sb'): # check its synchronization is active sync_cmd = (' '.join( (container_cmd_prefix, ovndb_sync_cmd)).format( container=container_name, ovndb_ctl_file=ovndb_ctl_file_dict[db])) sync_status = sh.execute(sync_cmd, ssh_client=node.ssh_client, sudo=True).stdout test_case.assertIn(expected_state_backup_str, sync_status) # obtain nb and sb show output show_cmd = (' '.join( (container_cmd_prefix, ovndb_show_cmd)).format(container=container_name, ovndb=ovndb_dict[db])) ovn_db_show = sh.execute(show_cmd, ssh_client=node.ssh_client, sudo=True).stdout ovn_dbs_show_dict[db] = build_ovn_db_show_dict(ovn_db_show) test_case.assertEqual(len(ovn_dbs_show_dict[db]), len(ovn_master_dbs_show_dict[db])) for key in ovn_dbs_show_dict[db]: test_case.assertEqual( sorted(ovn_dbs_show_dict[db][key]), sorted(ovn_master_dbs_show_dict[db][key])) LOG.info("All OVN DBs are synchronized")
def _delete_bridges(self, hostname, bridges): for br_name in bridges: agent_host = topology.get_openstack_node(hostname=hostname) sh.execute("sudo ovs-vsctl del-br %s" % br_name, ssh_client=agent_host.ssh_client) self.deleted_bridges[hostname].add(br_name)
def _create_bridge(self, hostname, bridges): for br_name in bridges: agent_host = topology.get_openstack_node(hostname=hostname) sh.execute("sudo ovs-vsctl --may-exist add-br %s" % br_name, ssh_client=agent_host.ssh_client)
def kill_ovn_controller(self, hosts: typing.Optional[typing.List[str]] = None, timeout=60, interval=5): '''Stop OVN controller container by killing ovn-controller process running into it Docker/Podman service should restart it automatically :parm hosts: List of hostnames to stop agent on :type hosts: list of strings :param timeout: Time to wait OVN controller is recovered :type timeout: int :param interval: Time to wait between attempts :type interval: int ''' hosts = hosts or self.hosts self.assertNotEqual([], hosts, "Host list is empty") if self.container_name == '': self.container_name = self.get_agent_container_name( self.agent_name) if not self.container_name: self.skipTest(f"Missing container(s): '{self.container_name}'") for host in hosts: ssh_client = topology.get_openstack_node(hostname=host).ssh_client pid = None for directory in ('ovn', 'openvswitch'): try: pid = sh.execute(f'{self.container_runtime_name} exec ' f'-uroot {self.container_name} cat ' f'/run/{directory}/ovn-controller.pid', ssh_client=ssh_client, sudo=True).stdout.splitlines()[0] except sh.ShellCommandFailed: LOG.debug(f'/run/{directory}/ovn-controller.pid cannot ' f'be accessed') else: LOG.debug(f'/run/{directory}/ovn-controller.pid returned ' f'pid {pid}') break self.assertIsNotNone(pid) LOG.debug(f'Killing process {pid} from container ' f'{self.container_name} on host {host}') sh.execute(f'{self.container_runtime_name} exec -uroot ' f'{self.container_name} kill {pid}', ssh_client=ssh_client, sudo=True) LOG.debug(f'Container {self.container_name} has been killed ' f"on host '{host}'...") # Schedule auto-restart of service at the end of this test case self.addCleanup(self.start_agent, hosts=[host, ]) # Verify the container is restarted automatically for attempt in tobiko.retry(timeout=timeout, interval=interval): search_running_ovn_cont = (f"{self.container_runtime_name} ps " "--format '{{.Names}}'" f" -f name={self.container_name}") output = sh.execute(search_running_ovn_cont, ssh_client=ssh_client, sudo=True).stdout.splitlines() if self.container_name in output: LOG.debug(f'{self.container_name} successfully restarted') break attempt.check_limits()