Ejemplo n.º 1
0
    def test_keepalived_failover(self):
        ha_router_id = self.ha_stack.network_stack.gateway_id
        self.agents = [self.wait_for_active_ha_l3_agent()]
        keepalived_pids = self.get_cmd_pids('keepalived',
                                            ha_router_id,
                                            min_pids_per_host=2)
        ping.ping_until_received(self.ha_stack.ip_address).assert_replied()
        active_agent_host = self.agents[0]['host']

        # Need to make sure that 'keepalived-state-change' process is UP
        # before we will kill 'keepalived' process as it can break the agent
        # status otherwise. So will check that keepalived pids are equal for
        # two attemts of listing them
        ka_state_cmd = f'neutron-keepalived-state-change.*{ha_router_id}'
        ka_state_pids = {}
        for _ in tobiko.retry(timeout=120., interval=5.):
            new_ka_state_pids = self.get_cmd_pids('/usr/bin/python',
                                                  ka_state_cmd,
                                                  min_pids_per_host=1)
            if ka_state_pids == new_ka_state_pids:
                break
            else:
                ka_state_pids = new_ka_state_pids

        self.kill_pids(active_agent_host, keepalived_pids[active_agent_host])
        ping.ping_until_received(self.ha_stack.ip_address).assert_replied()

        # Need to make sure that 'keepalived' is spawned back after it has
        # been killed
        self.assertNotEqual(keepalived_pids,
                            self.get_cmd_pids('keepalived',
                                              ha_router_id,
                                              min_pids_per_host=2))
Ejemplo n.º 2
0
def wait_for_processes(timeout: tobiko.Seconds = None,
                       sleep_interval: tobiko.Seconds = None,
                       ssh_client: ssh.SSHClientType = None,
                       is_cirros: bool = None,
                       **list_params):
    for attempt in tobiko.retry(timeout=timeout,
                                interval=sleep_interval,
                                default_interval=5.):
        processes = list_processes(ssh_client=ssh_client,
                                   is_cirros=is_cirros,
                                   **list_params)
        if not processes:
            break

        hostname = _hostname.get_hostname(ssh_client=ssh_client)
        process_lines = [
            '    {pid} {command}'.format(pid=process.pid,
                                         command=process.command)
            for process in processes]

        if attempt.is_last:
            raise PsWaitTimeout(timeout=timeout, hostname=hostname,
                                processes='\n'.join(process_lines))
        LOG.debug(f"Waiting for process(es) on host {hostname}...\n"
                  '\n'.join(process_lines))
Ejemplo n.º 3
0
 def test_port_ips(self, ip_version: typing.Optional[int] = None):
     """Checks port IPS has been assigned to server via DHCP protocol"""
     port_ips = set(
         neutron.list_device_ip_addresses(
             device_id=self.stack.server_id,
             network_id=self.stack.network_stack.network_id,
             need_dhcp=self.stack.need_dhcp,
             ip_version=ip_version))
     if port_ips:
         # verify neutron port IPs and VM port IPs match
         # when a VM connected to the external network has been just
         # created, it may need some time to receive its IPv6 address
         for attempt in tobiko.retry(timeout=60., interval=4.):
             server_ips = set(
                 ip.list_ip_addresses(scope='global',
                                      ssh_client=self.stack.ssh_client))
             server_ips &= port_ips  # ignore other server IPs
             LOG.debug("Neutron IPs and VM IPs should match...")
             try:
                 self.assertEqual(
                     port_ips, server_ips,
                     f"Server {self.stack.server_id} is missing port "
                     f"IP(s): {port_ips - server_ips}")
                 break
             except self.failureException:
                 attempt.check_limits()
     elif ip_version:
         self.skipTest(f"Server has any port IPv{ip_version} address to be"
                       " tested")
     else:
         self.skipTest("Server has any port IP address to be tested")
Ejemplo n.º 4
0
    def wait_for_members_to_be_reachable(self,
                                         interval: tobiko.Seconds = None,
                                         timeout: tobiko.Seconds = None):

        members = [self.server_stack, self.other_server_stack]

        if len(members) < 1:
            return

        # Wait for members to be reachable from localhost
        last_reached_id = 0
        for attempt in tobiko.retry(timeout=timeout,
                                    interval=interval,
                                    default_interval=5.,
                                    default_timeout=members[0].wait_timeout):
            try:
                for member in members[last_reached_id:]:
                    octavia.check_members_balanced(
                        members_count=1,
                        ip_address=member.ip_address,
                        protocol=self.lb_protocol,
                        port=self.lb_port,
                        requests_count=1)
                    last_reached_id += 1  # prevent retrying same member again
            except sh.ShellCommandFailed:
                if attempt.is_last:
                    raise
                LOG.info(
                    "Waiting for members to have HTTP service available...")
                continue
            else:
                break
        else:
            raise RuntimeError("Members couldn't be reached!")
Ejemplo n.º 5
0
    def wait_until_stack_deleted(self,
                                 check=True,
                                 cached=True,
                                 timeout: tobiko.Seconds = None,
                                 interval: tobiko.Seconds = None):
        # check stack has been completely deleted
        for attempt in tobiko.retry(timeout=timeout,
                                    interval=interval,
                                    default_timeout=self.wait_timeout,
                                    default_interval=self.wait_interval):
            # Ensure to refresh stack status
            stack = self.wait_for_delete_complete(check=check,
                                                  cached=cached,
                                                  timeout=attempt.time_left,
                                                  interval=attempt.interval)
            if stack is None:
                LOG.debug(f"Stack {self.stack_name} disappeared")
                break

            assert stack.stack_status == DELETE_COMPLETE
            if attempt.is_last:
                raise HeatStackDeletionFailed(
                    name=self.stack_name,
                    observed=stack.stack_status,
                    expected={DELETE_COMPLETE},
                    status_reason=stack.stack_status_reason)

            cached = False
            LOG.debug("Waiting for deleted stack to disappear: '%s'",
                      self.stack_name)
        else:
            raise RuntimeError("Retry look broken itself")
Ejemplo n.º 6
0
    def create_stack(self, retry: tobiko.Retry = None) -> stacks.Stack:
        if config.get_bool_env('TOBIKO_PREVENT_CREATE'):
            stack = self.validate_created_stack()
        else:
            for attempt in tobiko.retry(retry,
                                        count=self.retry_count,
                                        timeout=self.retry_timeout,
                                        interval=0.):
                try:
                    stack = self.try_create_stack()
                    break
                except InvalidStackError:
                    LOG.exception(f"Error creating stack '{self.stack_name}'",
                                  exc_info=1)
                    if attempt.is_last:
                        raise

                    self.delete_stack()

                    # It uses a random time sleep to make conflicting
                    # concurrent creations less probable to occur
                    sleep_time = random_sleep_time(
                        min_time=self.min_retry_interval,
                        max_time=self.max_retry_interval)
                    LOG.debug(
                        f"Failed creating stack '{self.stack_name}' "
                        f"(attempt {attempt.number} of "
                        f"{attempt.count}). It will retry after "
                        f"{sleep_time} seconds",
                        exc_info=1)
                    time.sleep(sleep_time)
            else:
                raise RuntimeError('Retry loop broken')
        return stack
Ejemplo n.º 7
0
    def test_retry_with_timeout_and_big_interval(self):
        mock_time = self.patch_time()
        attempts = []

        try:
            for attempt in tobiko.retry(timeout=9., interval=3.):
                attempts.append(attempt)
        except tobiko.RetryTimeLimitError as ex:
            self.assertEqual(
                "Retry time limit exceeded "
                f"({attempt.details})", str(ex))
        else:
            self.fail("RetryTimeLimitError not raised")

        expected = [
            tobiko.retry_attempt(number=i + 1,
                                 timeout=9.,
                                 interval=3.,
                                 start_time=mock_time.start_time,
                                 elapsed_time=elapsed_time)
            for i, elapsed_time in enumerate([0., 4., 7., 10.])
        ]
        self.assertEqual(expected, attempts)
        mock_time.sleep.assert_has_calls(
            [mock.call(2.), mock.call(1.),
             mock.call(1.)])
Ejemplo n.º 8
0
    def setUp(self):
        # pylint: disable=no-member
        super(OctaviaBasicFaultTest, self).setUp()

        # Wait for Octavia objects to be active
        LOG.info('Waiting for member '
                 f'{self.listener_stack.server_stack.stack_name} and '
                 f'for member '
                 f'{self.listener_stack.other_server_stack.stack_name} '
                 f'to be created...')
        self.listener_stack.wait_for_active_members()

        self.loadbalancer_stack.wait_for_octavia_service()

        self.listener_stack.wait_for_members_to_be_reachable()

        # For 5 minutes we ignore specific exceptions as we know
        # that Octavia resources are being provisioned
        for attempt in tobiko.retry(timeout=300.):
            try:
                octavia.check_members_balanced(
                    pool_id=self.listener_stack.pool_id,
                    ip_address=self.loadbalancer_stack.floating_ip_address,
                    lb_algorithm=self.listener_stack.lb_algorithm,
                    protocol=self.listener_stack.lb_protocol,
                    port=self.listener_stack.lb_port)
                break
            except (octavia.RoundRobinException, octavia.TrafficTimeoutError,
                    sh.ShellCommandFailed):
                LOG.exception(f"Traffic didn't reach all members after "
                              f"#{attempt.number} attempts and "
                              f"{attempt.elapsed_time} seconds")
                if attempt.is_last:
                    raise
Ejemplo n.º 9
0
 def run_operation(self):
     self.is_rebooted = False
     self.start_time = None
     for attempt in tobiko.retry(
             timeout=self.timeout,
             default_timeout=self.default_wait_timeout,
             default_count=self.default_wait_count,
             default_interval=self.default_wait_interval):
         try:
             channel = self.ssh_client.connect(
                 connection_timeout=attempt.time_left, retry_count=1)
             LOG.info("Executing reboot command on host "
                      f"'{self.hostname}' (command='{self.command}')... ")
             self.start_time = tobiko.time()
             channel.exec_command(str(self.command))
         except Exception as ex:
             if attempt.time_left > 0.:
                 LOG.debug(f"Unable to reboot remote host "
                           f"(time_left={attempt.time_left}): {ex}")
             else:
                 LOG.exception(f"Unable to reboot remote host: {ex}")
                 raise RebootHostTimeoutError(
                     hostname=self.hostname or self.ssh_client.host,
                     timeout=attempt.timeout) from ex
         else:
             LOG.info(f"Host '{self.hostname}' is rebooting "
                      f"(command='{self.command}').")
             break
         finally:
             # Ensure we close connection after rebooting command
             self.ssh_client.close()
Ejemplo n.º 10
0
    def wait_processes_destroyed(self, command_filter, pids_per_host,
                                 timeout=120, interval=2):
        '''Wait for processes to be terminated on hosts

        Make sure that all processes from the list are terminated or return
        an error otherwise. Tricky situation may happen when the different
        process with same PID can be spawned so then need to check it against
        `command_filter`.

        :param command_filter: Patter to be found in process command details
        :type command_filter: string
        :param pids_per_host: Dictionary with hostnames as a key and list of
                PIDs as a value
        :type pids_per_host: dict
        :param timeout: Time to wait till each process will be terminated
        :type timeout: int
        :param interval: Time to sleep between attempts
        :type interval: int
        '''
        LOG.debug(f'Waiting for processes to be finished: {pids_per_host}')
        for host, pids in pids_per_host.items():
            for pid in pids:
                retry = tobiko.retry(timeout=timeout, interval=interval)
                for _ in retry:
                    LOG.debug(f'Check if {pid} has been terminated on {host}')
                    if self.is_destroyed(pid, command_filter, host):
                        LOG.debug(f'Process {pid} finished on {host}')
                        break
Ejemplo n.º 11
0
    def test_0_vlan_ip_addresses(self):
        """Check Nova server VLAN port IP addresses"""
        self.stack.ensure_server_status('ACTIVE')
        expected = set(self.stack.list_vlan_fixed_ips())
        for attempt in tobiko.retry():
            actual = set(
                ip.list_ip_addresses(device=self.stack.vlan_device,
                                     ssh_client=self.stack.ssh_client,
                                     scope='global'))
            unexpected = actual - expected
            if unexpected:
                self.fail("Unexpected IP address assigned to VLAN port: "
                          f"{unexpected}")

            missing = expected - actual
            if missing:
                if attempt.is_last:
                    self.fail("IP addresses not assigned to VLAN port: "
                              f"{unexpected}")
                else:
                    LOG.debug("IP addresses still not assigned to VLAN port: "
                              f"{unexpected}")
            else:
                break
        else:
            raise RuntimeError("Broken retry loop")
        self.assertEqual(set(expected), set(actual))
Ejemplo n.º 12
0
    def _stop_octavia_main_services(self, controller: OpenStackTopologyNode,
                                    excluded_services: typing.List[str]):
        """Stops the provided octavia services.

        This method stops the provided octavia services, except for the ones
        which are in excluded_services.
        After it runs the "stop command" (e.g. `systemctl stop`),
        it makes sure that the Octavia's stopped services do not appear on
        the running Octavia services.

        It then sends traffic to validate the Octavia's functionality
        """

        # Preparing the services to stop
        services_to_stop = octavia.OCTAVIA_SERVICES

        if excluded_services:
            services_to_stop = [
                service for service in services_to_stop
                if (service not in excluded_services)
            ]

        # Stopping the Octavia services
        for service in services_to_stop:
            command = f"systemctl stop {service}"

            sh.execute(command, ssh_client=controller.ssh_client, sudo=True)

            log_msg = f"Stopping {service} on {controller.name}"
            LOG.info(log_msg)

        # Making sure the Octavia services were stopped
        octavia_active_units = self._list_octavia_services(
            controller.ssh_client)

        for service in services_to_stop:
            err_msg = f'{service} was not stopped on {controller.name}'
            self.assertTrue(service not in octavia_active_units, err_msg)

        self.loadbalancer_stack.wait_for_octavia_service()

        # For 30 seconds we ignore the OctaviaClientException as we know
        # that Octavia services are being stopped and restarted
        for attempt in tobiko.retry(timeout=30.):
            try:
                octavia.check_members_balanced(
                    pool_id=self.listener_stack.pool_id,
                    ip_address=self.loadbalancer_stack.floating_ip_address,
                    lb_algorithm=self.listener_stack.lb_algorithm,
                    protocol=self.listener_stack.lb_protocol,
                    port=self.listener_stack.lb_port)
                break
            except octavia.OctaviaClientException:
                LOG.exception(f"Octavia service was unavailable after "
                              f"#{attempt.number} attempts and "
                              f"{attempt.elapsed_time} seconds")
                if attempt.is_last:
                    raise
Ejemplo n.º 13
0
    def create_client(self):  # noqa: C901
        for _ in tobiko.retry(timeout=60., interval=5.):
            try:
                podman_remote_socket = self.discover_podman_socket()
                username = self.ssh_client.connect_parameters['username']
                host = self.ssh_client.connect_parameters["hostname"]
                socket = podman_remote_socket
                podman_remote_socket_uri = f'unix:/tmp/podman.sock_{host}'

                remote_uri = f'ssh://{username}@{host}{socket}'

                if podman_version_3():
                    # check if a ssh tunnel exists, if not create one
                    psall = str(subprocess.check_output(('ps', '-ef')))
                    if f'ssh -L /tmp/podman.sock_{host}' not in psall:
                        if os.path.exists(f"/tmp/podman.sock_{host}"):
                            subprocess.call(
                                ['rm', '-f', f'/tmp/podman.sock_{host}'])
                        # start a background  ssh tunnel with the remote host
                        subprocess.call([
                            'ssh', '-L', f'/tmp/podman.sock_{host}:'
                            f'/run/podman/podman.sock', host, '-N', '-f'
                        ])
                        for _ in tobiko.retry(timeout=60., interval=1.):
                            if os.path.exists(f'/tmp/podman.sock_{host}'):
                                break
                    client = podman.PodmanClient(
                        base_url=podman_remote_socket_uri)
                    if client.ping():
                        LOG.info('container_client is online')

                else:
                    client = _podman1.Client(  # pylint: disable=E1101
                        uri=podman_remote_socket_uri,
                        remote_uri=remote_uri,
                        identity_file='~/.ssh/id_rsa')
                    if client.system.ping():
                        LOG.info('container_client is online')
                return client
            except (ConnectionRefusedError, ConnectionResetError):
                # retry
                self.create_client()
Ejemplo n.º 14
0
    def communicate(self,
                    stdin=None,
                    stdout=True,
                    stderr=True,
                    timeout: tobiko.Seconds = None,
                    receive_all=False,
                    buffer_size=None):
        timeout = tobiko.to_seconds(timeout)

        # Avoid waiting for data in the first loop
        poll_interval = 0.
        streams = _io.select_opened_files([
            stdin and self.stdin, stdout and self.stdout, stderr
            and self.stderr
        ])
        for attempt in tobiko.retry(timeout=timeout):
            if not self._is_communicating(
                    streams=streams, send=stdin, receive=receive_all):
                break

            # Remove closed streams
            streams = _io.select_opened_files(streams)

            # Select ready streams
            read_ready, write_ready = _io.select_files(files=streams,
                                                       timeout=poll_interval)
            if read_ready or write_ready:
                # Avoid waiting for data the next time
                poll_interval = 0.
                if self.stdin in write_ready:
                    # Write data to remote STDIN
                    stdin = self._write_to_stdin(stdin)
                    if not stdin:
                        streams.remove(self.stdin)
                if self.stdout in read_ready:
                    # Read data from remote STDOUT
                    stdout = self._read_from_stdout(buffer_size=buffer_size)
                    if not stdout:
                        streams.remove(self.stdout)
                if self.stderr in read_ready:
                    # Read data from remote STDERR
                    stderr = self._read_from_stderr(buffer_size=buffer_size)
                    if not stderr:
                        streams.remove(self.stderr)
            else:
                self._check_communicate_timeout(attempt=attempt,
                                                timeout=timeout)
                # Wait for data in the following loops
                poll_interval = self.parameters.poll_interval
                LOG.debug(f"Waiting for process data {poll_interval} "
                          f"seconds... \n"
                          f"  command: {self.command}\n"
                          f"  attempt: {attempt.details}\n"
                          f"  streams: {streams}")
Ejemplo n.º 15
0
 def wait_for_metadata_status(self, count=None, timeout=60., interval=2.,
                              is_reachable: typing.Optional[bool] = None):
     for attempt in tobiko.retry(timeout=timeout, interval=interval,
                                 count=count):
         if is_reachable is not None:
             try:
                 self.assert_metadata_is_reachable(is_reachable)
             except self.failureException:
                 # re-raises failureException when reaching retry limits
                 attempt.check_limits()
             else:
                 break
Ejemplo n.º 16
0
 def request_number(self, timeout=30.) -> int:
     connection = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
     with connection:
         for attempt in tobiko.retry(timeout=timeout):
             try:
                 connection.connect(get_sock_file())
                 break
             except (ConnectionRefusedError, FileNotFoundError) as ex:
                 if attempt.is_last:
                     raise RuntimeError('Server not running') from ex
         message = connection.recv(4096)
         return int(message)
Ejemplo n.º 17
0
    def wait_for_active_ha_l3_agent(self) -> AgentType:
        ha_router_id = self.ha_stack.network_stack.gateway_id
        for attempt in tobiko.retry(timeout=180., interval=5.):
            agents = neutron.list_l3_agent_hosting_routers(ha_router_id)
            try:
                active_agent = agents.with_items(ha_state='active').unique
                break
            except (tobiko.MultipleObjectsFound, tobiko.ObjectNotFound):
                attempt.check_limits()
                continue

        return active_agent
Ejemplo n.º 18
0
    def wait_for_stack_status(
            self,
            expected_status: typing.Container[str],
            check=True,
            cached=True,
            timeout: tobiko.Seconds = None,
            interval: tobiko.Seconds = None) \
            -> typing.Optional[stacks.Stack]:
        """Waits for the stack to reach the given status."""
        for attempt in tobiko.retry(timeout=timeout,
                                    interval=interval,
                                    default_timeout=self.wait_timeout,
                                    default_interval=self.wait_interval):
            if cached:
                cached = False
                stack = self.stack or self.get_stack()
            else:
                stack = self.get_stack()
            stack_status = getattr(stack, 'stack_status', DELETE_COMPLETE)
            if stack_status in expected_status:
                LOG.debug(f"Stack '{self.stack_name}' reached expected "
                          f"status: '{stack_status}'")
                break

            if not stack_status.endswith('_IN_PROGRESS'):
                LOG.warning(f"Stack '{self.stack_name}' reached unexpected "
                            f"status: '{stack_status}'")
                break

            if attempt.is_last:
                LOG.warning(f"Timed out waiting for stack '{self.stack_name}' "
                            f"status to change from '{stack_status}' to "
                            f"'{expected_status}'.")
                break

            LOG.debug(f"Waiting for stack '{self.stack_name}' status to "
                      f"change from '{stack_status}' to "
                      f"'{expected_status}'...")
        else:
            raise RuntimeError('Retry loop broken')

        if stack is not None:
            self._log_stack_status(stack)

        if check:
            if stack is None:
                if DELETE_COMPLETE not in expected_status:
                    raise HeatStackNotFound(name=self.stack_name)
            else:
                check_stack_status(stack, expected_status)

        return stack
Ejemplo n.º 19
0
def ensure_nova_quota_limits(project: keystone.ProjectType = None,
                             user: keystone.UserType = None,
                             client: _client.NovaClientType = None,
                             retry_timeout: tobiko.Seconds = None,
                             retry_interval: tobiko.Seconds = None,
                             **required_quotas: int):
    if not required_quotas:
        return

    client = _client.nova_client(client)
    project = keystone.get_project_id(project=project,
                                      session=client.client.session)
    user = user and keystone.get_user_id(user=user) or None
    if user:
        # Must increase project limits before user ones
        ensure_nova_quota_limits(project=project,
                                 client=client,
                                 **required_quotas)

    for attempt in tobiko.retry(timeout=retry_timeout,
                                interval=retry_interval,
                                default_timeout=60.,
                                default_interval=3.):
        actual_limits, expected_limits = get_nova_quota_limits_increase(
            project=project,
            user=user,
            client=client,
            extra_increase=10 // attempt.number,
            **required_quotas)
        if expected_limits:
            if attempt.is_last:
                raise EnsureNovaQuotaLimitsError(
                    project=project,
                    actual_limits=actual_limits,
                    expected_limits=expected_limits)
            LOG.info(f"Increase Nova quota limit(s) (project={project}, "
                     f"user={user}): {actual_limits} -> {expected_limits}...")
            try:
                set_nova_quota_set(project=project,
                                   user=user,
                                   client=client,
                                   **expected_limits)
            except Exception:
                if attempt.is_last:
                    raise
                LOG.exception("Error increasing Nova quota set limits: "
                              f"{expected_limits}")
        else:
            LOG.debug(f"Required Nova quota limits are OK: {required_quotas}")
            break
    else:
        raise RuntimeError("Broken retry loop")
Ejemplo n.º 20
0
    def test_reboot_amphora_compute_node(self):
        amphora_compute_host = octavia.get_amphora_compute_node(
            loadbalancer_id=self.loadbalancer_stack.loadbalancer_id,
            lb_port=self.listener_stack.lb_port,
            lb_protocol=self.listener_stack.lb_protocol,
            ip_address=self.loadbalancer_stack.floating_ip_address)

        LOG.debug('Rebooting compute node...')

        # Reboot Amphora's compute node will initiate a failover
        amphora_compute_host.reboot_overcloud_node()

        LOG.debug('Compute node has been rebooted')

        # Wait for the LB to be updated
        try:
            self.loadbalancer_stack.wait_for_update_loadbalancer(timeout=30)

        except tobiko.RetryTimeLimitError:
            LOG.info('The restarted servers reached ACTIVE status after the'
                     ' LB finished its update process, hence no exception is'
                     ' being raised even though the update timeout was'
                     ' reached.')

        self.loadbalancer_stack.wait_for_active_loadbalancer()

        LOG.debug(f'Load Balancer {self.loadbalancer_stack.loadbalancer_id} is'
                  f' ACTIVE')

        # Wait for Octavia objects' provisioning status to be ACTIVE
        self.listener_stack.wait_for_active_members()

        # Verify Octavia functionality
        # For 5 minutes we ignore specific exceptions as we know
        # that Octavia resources are being provisioned/migrated
        for attempt in tobiko.retry(timeout=300.):
            try:
                octavia.check_members_balanced(
                    pool_id=self.listener_stack.pool_id,
                    ip_address=self.loadbalancer_stack.floating_ip_address,
                    lb_algorithm=self.listener_stack.lb_algorithm,
                    protocol=self.listener_stack.lb_protocol,
                    port=self.listener_stack.lb_port)
                break
            except (octavia.RoundRobinException, octavia.TrafficTimeoutError,
                    sh.ShellCommandFailed):
                LOG.exception(f"Traffic didn't reach all members after "
                              f"#{attempt.number} attempts and "
                              f"{attempt.elapsed_time} seconds")
                if attempt.is_last:
                    raise
Ejemplo n.º 21
0
def check_computes_vms_running_via_virsh():
    """check all vms are running via virsh list command"""
    for compute in topology.list_openstack_nodes(group='compute'):
        hostname = get_fqdn_from_topology_node(compute)
        retry = tobiko.retry(timeout=120, interval=5)
        for vm_id in get_compute_vms_df(hostname)['vm_id'].to_list():
            for _ in retry:
                if check_vm_running_via_virsh(compute, vm_id):
                    LOG.info(f"{vm_id} is running ok on "
                             f"{compute.hostname}")
                    break
                else:
                    LOG.info(f"{vm_id} is not in running state on "
                             f"{compute.hostname}")
Ejemplo n.º 22
0
 def get_client(self, ssh_client):
     for attempt in tobiko.retry(timeout=60.0, interval=5.0):
         try:
             client = self._get_client(ssh_client=ssh_client)
             break
         # TODO chose a better exception type
         except Exception:
             if attempt.is_last:
                 raise
             LOG.debug('Unable to connect to docker server', exc_info=1)
             ssh.reset_default_ssh_port_forward_manager()
     else:
         raise RuntimeError("Broken retry loop")
     return client
Ejemplo n.º 23
0
def kill_rabbitmq_service():
    """kill a rabbit process on a random controller,
    check in pacemaker it is down"""
    if tripleo_topology.is_composable_roles_env():
        nodes = topology.list_openstack_nodes(group='messaging')
    else:
        nodes = topology.list_openstack_nodes(group='controller')
    node = random.choice(nodes)
    sh.execute(kill_rabbit, ssh_client=node.ssh_client)
    LOG.info('kill rabbit: {} on server: {}'.format(kill_rabbit, node.name))
    retry = tobiko.retry(timeout=30, interval=5)
    for _ in retry:
        if not (pacemaker.PacemakerResourcesStatus().rabbitmq_resource_healthy(
        )):
            return
Ejemplo n.º 24
0
    def test_retry_when_succeed(self, ):
        mock_time = self.patch_time()
        attempts = []

        for attempt in tobiko.retry():
            attempts.append(attempt)
            break  # this marks a success

        expected = [
            tobiko.retry_attempt(number=1,
                                 start_time=mock_time.start_time,
                                 elapsed_time=0.)
        ]
        self.assertEqual(expected, attempts)
        mock_time.sleep.assert_not_called()
Ejemplo n.º 25
0
    def create_process(self):
        """Execute command on a remote host using SSH client"""
        command = str(self.command)
        ssh_client = self.ssh_client
        parameters = self.parameters

        tobiko.check_valid_type(ssh_client, ssh.SSHClientFixture)
        tobiko.check_valid_type(parameters, SSHShellProcessParameters)
        environment = parameters.environment

        for attempt in tobiko.retry(
                timeout=self.parameters.timeout,
                default_count=self.parameters.retry_count,
                default_interval=self.parameters.retry_interval,
                default_timeout=self.parameters.retry_timeout):

            timeout = attempt.time_left
            details = (f"command='{command}', "
                       f"login={ssh_client.login}, "
                       f"timeout={timeout}, "
                       f"attempt={attempt}, "
                       f"environment={environment}")
            LOG.debug(f"Create remote process... ({details})")
            try:
                client = ssh_client.connect()
                process = client.get_transport().open_session()
                if environment:
                    variables = " ".join(
                        f"{name}={shlex.quote(value)}"
                        for name, value in self.environment.items())
                    command = variables + " " + command
                process.exec_command(command)
                LOG.debug(f"Remote process created. ({details})")
                return process
            except Exception:
                # Before doing anything else cleanup SSH connection
                ssh_client.close()
                LOG.debug(f"Error creating remote process. ({details})",
                          exc_info=1)
            try:
                attempt.check_limits()
            except tobiko.RetryTimeLimitError as ex:
                LOG.debug(f"Timed out creating remote process. ({details})")
                raise _exception.ShellTimeoutExpired(command=command,
                                                     stdin=None,
                                                     stdout=None,
                                                     stderr=None,
                                                     timeout=timeout) from ex
Ejemplo n.º 26
0
    def test_2_delete_server(self):
        server = self.ensure_server(status='ACTIVE')
        self.stack.assert_is_reachable()

        nova.delete_server(server.id)
        for _ in tobiko.retry(timeout=60., interval=3.):
            try:
                server = nova.get_server(server_id=server.id)
            except nova.ServerNotFoundError:
                LOG.debug(f"Server '{server.id}' deleted")
                break
            else:
                LOG.debug(f"Waiting for server deletion:\n"
                          f" - server.id='{server.id}'"
                          f" - server.status='{server.status}'")
        self.stack.assert_is_unreachable()
Ejemplo n.º 27
0
def request_galera_sst():
    """remove_one_grastate_galera,
    check that sst is requested by a node with grastate"""
    node, date = remove_one_grastate_galera()
    bootstrapDate = datetime.strptime(date, '%a %b %d %H:%M:%S %Y')
    retry = tobiko.retry(timeout=30, interval=5)
    for _ in retry:
        sst_req = sh.execute(galera_sst_request,
                             ssh_client=node.ssh_client).stdout
        if sst_req:
            break
    sstDate = datetime.strptime(
        re.findall(r"\d{4}-\d{,2}-\d{,2}\s*\d{,2}:\d{,2}:\d{,2}", sst_req)[-1],
        '%Y-%m-%d %H:%M:%S')
    if bootstrapDate > sstDate:
        raise TimestampException
Ejemplo n.º 28
0
def kill_all_galera_services():
    """kill all galera processes,
    check in pacemaker it is down"""
    if tripleo_topology.is_composable_roles_env():
        nodes = topology.list_openstack_nodes(group='database')
    else:
        nodes = topology.list_openstack_nodes(group='controller')
    for node in nodes:
        sh.execute(kill_galera, ssh_client=node.ssh_client)
        LOG.info('kill galera: {} on server: {}'.format(
            kill_galera, node.name))
    retry = tobiko.retry(timeout=30, interval=5)
    for _ in retry:
        if not (pacemaker.PacemakerResourcesStatus().galera_resource_healthy()
                ):
            return
Ejemplo n.º 29
0
def get_console_output(server: typing.Optional[ServerType] = None,
                       server_id: typing.Optional[str] = None,
                       timeout: tobiko.Seconds = None,
                       interval: tobiko.Seconds = None,
                       length: typing.Optional[int] = None,
                       client: NovaClientType = None) -> \
        typing.Optional[str]:
    if length is not None:
        length = min(length, MAX_SERVER_CONSOLE_OUTPUT_LENGTH)
    else:
        length = MAX_SERVER_CONSOLE_OUTPUT_LENGTH

    server_id = get_server_id(server=server, server_id=server_id)

    for attempt in tobiko.retry(timeout=timeout,
                                interval=interval,
                                default_timeout=60.,
                                default_interval=5.):
        try:
            output = nova_client(client).servers.get_console_output(
                server=server_id, length=length)
        except (TypeError, novaclient.exceptions.NotFound):
            # Only active servers have console output
            server = get_server(server_id=server_id)
            if server.status != 'ACTIVE':
                LOG.debug(f"Server '{server_id}' has no console output "
                          f"(status = '{server.status}').")
                break
            else:
                # For some reason it could happen resulting body cannot be
                # translated to json object and it is converted to None
                LOG.exception(f"Error getting server '{server_id}' console "
                              "output")
        else:
            if output:
                LOG.debug(f"got server '{server_id}' console output "
                          f"(length = {len(output)}).")
                return output

        if attempt.is_last:
            LOG.info(f"No console output produced by server '{server_id}') "
                     f" after {attempt.elapsed_time} seconds")
            break

        LOG.debug(f"Waiting for server '{server_id}' console output...")

    return None
Ejemplo n.º 30
0
 def execute(self,
             retry_count: int = None,
             retry_timeout: tobiko.Seconds = None,
             retry_interval: tobiko.Seconds = None) -> \
         sh.ShellExecuteResult:
     for attempt in tobiko.retry(count=retry_count,
                                 timeout=retry_timeout,
                                 interval=retry_interval,
                                 default_count=1):
         self.start()
         result = self.wait(check=attempt.is_last)
         if result.exit_status == 0:
             break
         self.stop()
     else:
         raise RuntimeError("Retry loop broken")
     return result