def revert_snapshot(self, name, skip_timesync=False, skip_slaves_check=False):
        if not self.d_env.has_snapshot(name):
            return False

        logger.info("We have snapshot with such name: {:s}".format(name))

        logger.info("Reverting the snapshot '{0}' ....".format(name))
        self.d_env.revert(name)

        logger.info("Resuming the snapshot '{0}' ....".format(name))
        self.resume_environment()

        if not skip_timesync:
            self.sync_time()
        else:
            self.sync_time(["admin"])
        try:
            with QuietLogger(upper_log_level=logging.CRITICAL):
                # TODO(astudenov): add timeout_msg
                wait_pass(
                    self.fuel_web.client.get_releases,
                    expected=(exceptions.RetriableConnectionFailure, exceptions.UnknownConnectionError),
                    timeout=300,
                )
        except exceptions.Unauthorized:
            self.set_admin_keystone_password()
            self.fuel_web.get_nailgun_version()

        if not skip_slaves_check:
            # TODO(astudenov): add timeout_msg
            wait_pass(lambda: self.check_slaves_are_ready(), timeout=60 * 6)
        return True
 def wait_for_provisioning(self,
                           timeout=settings.WAIT_FOR_PROVISIONING_TIMEOUT):
     # TODO(astudenov): add timeout_msg
     wait_pass(lambda: tcp_ping_(
         self.d_env.nodes(
         ).admin.get_ip_address_by_network_name
         ('admin'), 22), timeout=timeout)
Exemple #3
0
    def revert_snapshot(self, name, skip_timesync=False,
                        skip_slaves_check=False):
        if not self.d_env.has_snapshot(name):
            return False

        logger.info('We have snapshot with such name: {:s}'.format(name))

        logger.info("Reverting the snapshot '{0}' ....".format(name))
        self.d_env.revert(name)

        logger.info("Resuming the snapshot '{0}' ....".format(name))
        self.resume_environment()

        if not skip_timesync:
            self.sync_time()
        else:
            self.sync_time(['admin'])
        try:
            with QuietLogger(upper_log_level=logging.CRITICAL):
                # TODO(astudenov): add timeout_msg
                wait_pass(
                    self.fuel_web.client.get_releases,
                    expected=(
                        exceptions.RetriableConnectionFailure,
                        exceptions.UnknownConnectionError),
                    timeout=300)
        except exceptions.Unauthorized:
            self.set_admin_keystone_password()
            self.fuel_web.get_nailgun_version()

        if not skip_slaves_check:
            # TODO(astudenov): add timeout_msg
            wait_pass(lambda: self.check_slaves_are_ready(), timeout=60 * 6)
        return True
Exemple #4
0
    def bootstrap_nodes(self, devops_nodes, timeout=settings.BOOTSTRAP_TIMEOUT,
                        skip_timesync=False):
        """Lists registered nailgun nodes
        Start vms and wait until they are registered on nailgun.
        :rtype : List of registered nailgun nodes
        """
        # self.dhcrelay_check()

        for node in devops_nodes:
            logger.info("Bootstrapping node: {}".format(node.name))
            node.start()
            # TODO(aglarendil): LP#1317213 temporary sleep
            # remove after better fix is applied
            time.sleep(5)

        with TimeStat("wait_for_nodes_to_start_and_register_in_nailgun"):
            wait(lambda: all(self.nailgun_nodes(devops_nodes)), 15, timeout,
                 timeout_msg='Bootstrap timeout for nodes: {}'
                             ''.format([node.name for node in devops_nodes]))

        wait_pass(
            lambda: checkers.validate_minimal_amount_nodes(
                nodes=self.nailgun_nodes(devops_nodes),
                expected_amount=len(devops_nodes)
            ),
            timeout=30)

        if not skip_timesync:
            self.sync_time()

        return self.nailgun_nodes(devops_nodes)
Exemple #5
0
 def wait_for_provisioning(self,
                           timeout=settings.WAIT_FOR_PROVISIONING_TIMEOUT):
     # TODO(astudenov): add timeout_msg
     wait_pass(lambda: tcp_ping_(
         self.d_env.nodes(
         ).admin.get_ip_address_by_network_name
         ('admin'), 22), timeout=timeout)
    def wait_nailgun_available(self):
        """Check status for Nailgun"""

        fuel_web = self.manager.fuel_web

        wait_pass(fuel_web.get_nailgun_version,
                  timeout=60 * 20)
    def bootstrap_nodes(self, devops_nodes, timeout=settings.BOOTSTRAP_TIMEOUT,
                        skip_timesync=False):
        """Lists registered nailgun nodes
        Start vms and wait until they are registered on nailgun.
        :rtype : List of registered nailgun nodes
        """
        # self.dhcrelay_check()

        for node in devops_nodes:
            logger.info("Bootstrapping node: {}".format(node.name))
            node.start()
            # TODO(aglarendil): LP#1317213 temporary sleep
            # remove after better fix is applied
            time.sleep(5)

        with TimeStat("wait_for_nodes_to_start_and_register_in_nailgun"):
            wait(lambda: all(self.nailgun_nodes(devops_nodes)), 15, timeout,
                 timeout_msg='Bootstrap timeout for nodes: {}'
                             ''.format([node.name for node in devops_nodes]))

        wait_pass(
            lambda: checkers.validate_minimal_amount_nodes(
                nodes=self.nailgun_nodes(devops_nodes),
                expected_amount=len(devops_nodes)
            ),
            timeout=30)

        if not skip_timesync:
            self.sync_time()

        return self.nailgun_nodes(devops_nodes)
 def wait_check_network(self,
                        kube_host_ip,
                        works=True,
                        timeout=120,
                        interval=5):
     helpers.wait_pass(
         lambda: self.check_network(kube_host_ip, works=works),
         timeout=timeout,
         interval=interval)
Exemple #9
0
def wait_check_network(k8sclient,
                       namespace='default',
                       works=True,
                       timeout=120,
                       interval=5,
                       netchecker_pod_port=NETCHECKER_NODE_PORT):
    helpers.wait_pass(
        lambda: check_network(k8sclient,
                              netchecker_pod_port=netchecker_pod_port,
                              namespace=namespace,
                              works=works),
        timeout=timeout,
        interval=interval)
Exemple #10
0
    def reboot_hw_node(self, ssh, salt, node):
        """Reboot the given node and wait for it to start back

        :param ssh: UnderlaySSHManager, tcp-qa SSH manager instance
        :param salt: SaltManager, tcp-qa Salt manager instance
        :param node: str, name of the node to reboot
        """
        LOG.info("Sending reboot command to '{}' node.".format(node))
        remote = ssh.remote(node_name=node)
        remote.execute_async("/sbin/shutdown -r now")

        # Wait for restarted node to boot and become accessible
        helpers.wait_pass(lambda: salt.local(node, "test.ping", timeout=5),
                          timeout=60 * 10,
                          interval=5)
    def test_daemonset_rollout_rollingupdate(self, underlay,
                                             k8scluster, config, show_step):
        """Rollback a daemonset using updateStrategy type: RollingUpdate

        Scenario:
            1. Deploy k8s using fuel-ccp-installer
            2. Create a DaemonSet for nginx with image version 1_10 and
               update strategy RollingUpdate
            3. Wait until nginx pods are created and become 'ready'
            4. Check that the image version in the nginx pods is 1_10
               Check that the image version in the nginx daemonset is 1_10
            5. Change nginx image version to 1_11 using YAML
            6. Wait for 10 seconds (needs to check that there were
               no auto updates of the nginx pods)
            7. Check that the image version in the nginx daemonset
               is updated to 1_11
               Wait for ~120 sec that the image version
               in the nginx pods is changed to 1_11
            8. Rollback the nginx daemonset:
               kubectl rollout undo daemonset/nginx
            9. Check that the image version in the nginx daemonset is
               downgraded to 1_10
               Wait for ~120 sec that the image version
               in the nginx pods is downgraded to 1_10

        Duration: 3000 seconds
        """

        self.test_daemonset_rollingupdate(k8scluster, show_step)

        k8sclient = k8scluster.api

        show_step(8)
        cmd = "kubectl rollout undo daemonset/nginx"
        underlay.check_call(cmd,
                            host=config.k8s.kube_host)

        # STEP #9
        show_step(9)
        self.check_nginx_ds_image(k8sclient, self.from_nginx_image)
        # Pods should have new image version
        helpers.wait_pass(
            lambda: self.check_nginx_pods_image(
                k8sclient,
                self.from_nginx_image),
            timeout=2 * 60
        )
Exemple #12
0
    def test_daemonset_rollout_rollingupdate(self, underlay, k8scluster,
                                             config, show_step):
        """Rollback a daemonset using updateStrategy type: RollingUpdate

        Scenario:
            1. Deploy k8s using fuel-ccp-installer
            2. Create a DaemonSet for nginx with image version 1_10 and
               update strategy RollingUpdate
            3. Wait until nginx pods are created and become 'ready'
            4. Check that the image version in the nginx pods is 1_10
               Check that the image version in the nginx daemonset is 1_10
            5. Change nginx image version to 1_11 using YAML
            6. Wait for 10 seconds (needs to check that there were
               no auto updates of the nginx pods)
            7. Check that the image version in the nginx daemonset
               is updated to 1_11
               Wait for ~120 sec that the image version
               in the nginx pods is changed to 1_11
            8. Rollback the nginx daemonset:
               kubectl rollout undo daemonset/nginx
            9. Check that the image version in the nginx daemonset is
               downgraded to 1_10
               Wait for ~120 sec that the image version
               in the nginx pods is downgraded to 1_10

        Duration: 3000 seconds
        """

        self.test_daemonset_rollingupdate(k8scluster, show_step)

        k8sclient = k8scluster.api

        show_step(8)
        cmd = "kubectl rollout undo daemonset/nginx"
        underlay.check_call(cmd, host=config.k8s.kube_host)

        # STEP #9
        show_step(9)
        self.check_nginx_ds_image(k8sclient, self.from_nginx_image)
        # Pods should have new image version
        helpers.wait_pass(lambda: self.check_nginx_pods_image(
            k8sclient, self.from_nginx_image),
                          timeout=2 * 60)
def rabbit_client(underlay, config, os_deployed):
    """Deploy openstack
    """
    host = config.k8s.kube_host
    remote = underlay.remote(host=host)
    rabbit_port = ''.join(remote.execute(
        "kubectl get service --namespace ccp rabbitmq -o yaml |"
        " awk '/nodePort: / {print $NF}'")['stdout'])
    client = helpers.wait_pass(lambda: rabbit.RabbitClient(host, rabbit_port),
                               interval=60, timeout=360)
    return client
Exemple #14
0
    def test_wait_pass(self, sleep):
        predicate = mock.Mock(return_value=True)
        result = helpers.wait_pass(predicate)
        self.assertTrue(result)

        predicate.reset_mock()
        predicate.side_effect = ValueError

        self.assertRaises(error.TimeoutError,
                          helpers.wait_pass,
                          predicate,
                          timeout=1)
Exemple #15
0
    def test_wait_pass(self, sleep):
        predicate = mock.Mock(return_value=True)
        result = helpers.wait_pass(predicate)
        self.assertTrue(result)

        predicate.reset_mock()
        predicate.side_effect = ValueError

        self.assertRaises(
            error.TimeoutError,
            helpers.wait_pass,
            predicate, timeout=1)
Exemple #16
0
    def test_wait_pass(self, sleep, time):
        predicate = mock.Mock(return_value=True)

        result = helpers.wait_pass(predicate)
        self.assertTrue(result)
        time.assert_called_once()
        sleep.assert_not_called()

        time.reset_mock()
        time.return_value = 1
        sleep.reset_mock()
        predicate.reset_mock()
        predicate.side_effect = ValueError
        self.assertRaises(ValueError, helpers.wait_pass, predicate, timeout=-1)
        sleep.assert_not_called()
        time.assert_has_calls([mock.call(), mock.call()])
    def create_component_and_env_configdb(self):
        """ Install and check ConfigDB

        Scenario:
            1. Revert snapshot empty
            2. Install configDB extension
            3. Create components
            4. Create environment with component
            5. Get and check created data
            6. Make snapshot

        Duration: 5 min
        Snapshot: create_component_and_env_configdb
        """

        self.check_run("create_component_and_env_configdb")
        self.show_step(1)
        self.env.revert_snapshot("empty")
        self.show_step(2)
        install_configdb()

        logger.debug("Waiting for ConfigDB")
        wait_pass(lambda: self.fuel_web.client.get_components(), timeout=45)

        logger.debug("Get env and component data")
        components = self.fuel_web.client.get_components()
        envs = self.fuel_web.client.get_environments()

        assert_false(components, "Components is not empty after tuningbox installation")
        assert_false(envs, "Environments is not empty after tuningbox installation")

        # Uploaded data
        component = {
            "name": "comp1",
            "resource_definitions": [
                {"name": self.RESOURCE_NAME_1, "content": {}},
                {"name": self.SLASHED_RESOURCE, "content": {}},
            ],
        }

        environment = {"name": "env1", "components": ["comp1"], "hierarchy_levels": ["nodes"]}
        self.show_step(3)
        self.fuel_web.client.create_component(component)
        self.show_step(4)
        self.fuel_web.client.create_environment(environment)
        self.show_step(5)
        comp = self.fuel_web.client.get_components(comp_id=1)
        env = self.fuel_web.client.get_environments(env_id=1)

        expected_comp = {
            "resource_definitions": [
                {"content": {}, "component_id": 1, "id": 1, "name": self.RESOURCE_NAME_1},
                {"content": {}, "component_id": 1, "id": 2, "name": self.SLASHED_RESOURCE},
            ],
            "id": 1,
            "name": "comp1",
        }
        expected_env = {"hierarchy_levels": ["nodes"], "id": 1, "components": [1]}
        logger.debug("Compare original component with " "received component from API")
        assert_equal(comp, expected_comp)
        logger.debug("Compare original env with received env from API")
        assert_equal(env, expected_env)
        self.show_step(6)
        self.env.make_snapshot("create_component_and_env_configdb", is_make=True)
Exemple #18
0
    def test_calico_route_recovery(self, show_step, config, underlay,
                                   k8s_deployed):
        """Test for deploying k8s environment with Calico plugin and check
           that local routes are recovered by felix after removal

        Scenario:
            1. Install k8s with Calico network plugin.
            2. Run netchecker-server service.
            3. Run netchecker-agent daemon set.
            4. Get network verification status. Check status is 'OK'.
            5. Remove local route to netchecker-agent pod on the first node
            6. Check that the route is automatically recovered
            7. Get network verification status. Check status is 'OK'.

        Duration: 3000 seconds
        """

        # STEP #1
        show_step(1)
        k8sclient = k8s_deployed.api
        assert k8sclient.nodes.list() is not None, "Can not get nodes list"

        # STEP #2
        show_step(2)
        netchecker.start_server(k8s=k8s_deployed, config=config)
        LOG.info("Waiting for netchecker server is running")
        netchecker.wait_check_network(k8sclient, works=True, timeout=300)

        # STEP #3
        show_step(3)
        netchecker.start_agent(k8s=k8s_deployed, config=config)

        # STEP #4
        show_step(4)
        netchecker.wait_check_network(k8sclient, works=True, timeout=300)

        # STEP #5
        show_step(5)
        first_node = k8sclient.nodes.list()[0]
        first_node_ips = [
            addr.address for addr in first_node.status.addresses
            if 'IP' in addr.type
        ]
        assert len(first_node_ips) > 0, "Couldn't find first k8s node IP!"
        first_node_names = [
            name for name in underlay.node_names()
            if name.startswith(first_node.name)
        ]
        assert len(first_node_names) == 1, "Couldn't find first k8s node " \
                                           "hostname in SSH config!"
        first_node_name = first_node_names.pop()

        target_pod_ip = None

        for pod in k8sclient.pods.list():
            if pod.status.host_ip not in first_node_ips:
                continue
            # TODO: get pods by daemonset with name 'netchecker-agent'
            if 'netchecker-agent-' in pod.name and 'hostnet' not in pod.name:
                target_pod_ip = pod.status.pod_ip

        assert target_pod_ip is not None, "Could not find netchecker pod IP!"

        route_del_cmd = 'ip route delete {0}'.format(target_pod_ip)
        underlay.sudo_check_call(cmd=route_del_cmd, node_name=first_node_name)
        LOG.debug('Removed local route to pod IP {0} on node {1}'.format(
            target_pod_ip, first_node.name))

        # STEP #6
        show_step(6)
        route_chk_cmd = 'ip route list | grep -q "{0}"'.format(target_pod_ip)
        helpers.wait_pass(lambda: underlay.sudo_check_call(
            cmd=route_chk_cmd, node_name=first_node_name),
                          timeout=120,
                          interval=2)
        pod_ping_cmd = 'sleep 3 && ping -q -c 1 -w 3 {0}'.format(target_pod_ip)
        underlay.sudo_check_call(cmd=pod_ping_cmd, node_name=first_node_name)
        LOG.debug('Local route to pod IP {0} on node {1} is '
                  'recovered'.format(target_pod_ip, first_node.name))

        # STEP #7
        show_step(7)
        netchecker.wait_check_network(k8sclient, works=True)
Exemple #19
0
    def test_maas_scheduled_backup_restore(
            self,
            underlay_actions,
            salt_actions,
            reclass_actions,
            show_step,
            cleanup_actions):
        """Test scheduled backup restore of MAAS data

        Scenario:
            1. Update MAAS backup schedule to run every 5 minutes
            2. Apply 'backupninja' state on the backupninja client node
            3. Wait until backup creation is triggered by schedule
            4. Wait until backup creation is finished
            5. Verify that MAAS backup is created on backupninja server node
            6. Delete/change some MAAS data
            7. Restore the backup
            8. Verify that MAAS data backup is restored
            9. Verify MAAS services after restore

        Duration: ~ 3 min
        """
        salt = salt_actions
        ssh = underlay_actions
        reclass = reclass_actions

        sm = salt.local("I@salt:master", "test.ping")['return'][0].keys()[0]
        server = salt.local(
            "I@backupninja:server", "test.ping")['return'][0].keys()[0]

        # Re-configure backup schedule
        show_step(1)
        self.update_backup_schedule(reclass, self.MAAS_YAML)

        # Apply 'backupninja' state on backupninja client node
        show_step(2)
        salt.enforce_state("I@backupninja:client", "backupninja")

        # Wait until backup is triggered by schedule
        show_step(3)
        helpers.wait_pass(
            lambda: ssh.check_call(
                cmd="pgrep backupninja && echo OK", node_name=sm),
            timeout=60 * 11,
            interval=5)

        # Wait until backup is finished
        show_step(4)
        ssh.check_call(
            cmd="while pgrep backupninja > /dev/null; do sleep 2; done",
            node_name=sm,
            timeout=60 * 5)

        # Verify that backup is created and all pieces of data are rsynced
        # to backupninja server
        show_step(5)
        self.check_backup(
            ssh, server, self.BCKP_SERVER_DIR, sm, self.MAAS_BACKUP_DIRS)

        # Simulate loss/change of some MAAS data
        show_step(6)
        self.delete_files(ssh, sm, self.MAAS_DIRS, self.MAAS_FILE_TO_DELETE)
        hashes = self.update_files(
            ssh, sm, self.MAAS_DIRS, self.MAAS_FILE_TO_UPDATE)

        # Restore the backup
        show_step(7)
        salt.enforce_state("I@maas:region", "maas.region")

        # Verify that all pieces of lost/changed data are restored
        show_step(8)
        self.verify_restored_data(
            ssh,
            sm,
            self.MAAS_DIRS,
            self.MAAS_FILE_TO_DELETE,
            self.MAAS_FILE_TO_UPDATE,
            hashes)

        # Verify that MAAS services are up and running after restore
        show_step(9)
        statuses = self.get_maas_svc_status(salt, sm)
        assert all(statuses.values()), (
            "Not all MAAS services are active after restore. Please check the "
            "affected services (marked as 'False' below):\n{}".format(statuses)
        )
    def negative_auto_cic_maintenance_mode(self):
        """Check negative scenario for auto maintenance mode

        Scenario:
            1. Revert snapshot
            2. Disable UMM
            3. Change UMM.CONF
            4. Unexpected reboot
            5. Check the controller not switching in maintenance mode
            6. Check the controller become available

        Duration 85m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        # Select a non-primary controller
        regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02")
        dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node(
            regular_ctrl)
        _ip = regular_ctrl['ip']
        _id = regular_ctrl['id']

        asserts.assert_true('True' in check_available_mode(_ip),
                            "Maintenance mode is not available")
        logger.info('Disable UMM  on node-{0}'.format(_id))

        change_config(_ip, umm=False, reboot_count=0)

        asserts.assert_false('True' in check_available_mode(_ip),
                             "Maintenance mode should not be available")

        logger.info('Unexpected reboot on node-{0}'.format(_id))

        self.ssh_manager.check_call(ip=_ip, command='reboot >/dev/null & ')

        wait(
            lambda: not checkers.check_ping(self.env.get_admin_node_ip(), _ip),
            timeout=60 * 10,
            timeout_msg='Node {} still responds to ping'.format(
                dregular_ctrl.name))

        # Node don't have enough time for set offline status
        # after reboot
        # Just waiting

        asserts.assert_true(
            checkers.check_ping(self.env.get_admin_node_ip(),
                                _ip,
                                deadline=600),
            "Host {0} is not reachable by ping during 600 sec".format(_ip))

        self.fuel_web.wait_node_is_online(dregular_ctrl)

        logger.info('Check that node-{0} not in maintenance mode after'
                    ' unexpected reboot'.format(_id))

        wait(lambda: tcp_ping(_ip, 22),
             timeout=60 * 10,
             timeout_msg='Node {} still is not available by SSH'.format(
                 dregular_ctrl.name))

        asserts.assert_false('True' in check_auto_mode(_ip),
                             "Maintenance mode should not switched")

        # Wait until MySQL Galera is UP on some controller
        self.fuel_web.wait_mysql_galera_is_up([dregular_ctrl.name])

        # Wait until Cinder services UP on a controller
        self.fuel_web.wait_cinder_is_up([dregular_ctrl.name])

        # Wait until RabbitMQ cluster is UP
        wait_pass(lambda: self.fuel_web.run_single_ostf_test(
            cluster_id,
            test_sets=['ha'],
            test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                'RabbitMQ availability')),
                  timeout=1500)
        logger.info('RabbitMQ cluster is available')

        # TODO(astudenov): add timeout_msg
        wait_pass(lambda: self.fuel_web.run_single_ostf_test(
            cluster_id,
            test_sets=['sanity'],
            test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                'Check that required services are running')),
                  timeout=1500)
        logger.info("Required services are running")

        try:
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
        except AssertionError:
            logger.debug("Test failed from first probe,"
                         " we sleep 600 second try one more time"
                         " and if it fails again - test will fails ")
            time.sleep(600)
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
    def test_daemonset_rollingupdate(self, k8scluster, show_step):
        """Update a daemonset using updateStrategy type: RollingUpdate

        Scenario:
            1. Deploy k8s using fuel-ccp-installer
            2. Create a DaemonSet for nginx with image version 1_10 and
               update strategy RollingUpdate
            3. Wait until nginx pods are created and become 'ready'
            4. Check that the image version in the nginx pods is 1_10
               Check that the image version in the nginx daemonset is 1_10
            5. Change nginx image version to 1_11 using YAML
            6. Wait for 10 seconds (needs to check that there were
               no auto updates of the nginx pods)
            7. Check that the image version in the nginx daemonset
               is updated to 1_11
               Wait for ~120 sec that the image version in the nginx pods
               is changed to 1_11

        Duration: 3000 seconds
        """

        # STEP #1
        show_step(1)
        k8sclient = k8scluster.api
        assert k8sclient.nodes.list() is not None, "Can not get nodes list"

        # STEP #2
        show_step(2)
        nginx_spec = self.get_nginx_spec()
        nginx_spec['spec']['template']['spec']['containers'][0][
            'image'] = self.from_nginx_image
        k8sclient.daemonsets.create(body=nginx_spec)

        # STEP #3
        show_step(3)
        time.sleep(3)
        self.wait_nginx_pods_ready(k8sclient)

        # STEP #4
        show_step(4)
        self.check_nginx_pods_image(k8sclient, self.from_nginx_image)
        self.check_nginx_ds_image(k8sclient, self.from_nginx_image)

        # STEP #5
        show_step(5)
        nginx_spec['spec']['template']['spec']['containers'][0][
            'image'] = self.to_nginx_image
        k8sclient.daemonsets.update(body=nginx_spec,
                                    name=nginx_spec['metadata']['name'])

        # STEP #6
        show_step(6)
        time.sleep(10)

        # STEP #7
        show_step(7)
        # DaemonSet should have new image version
        self.check_nginx_ds_image(k8sclient, self.to_nginx_image)
        # Pods should have new image version
        helpers.wait_pass(
            lambda: self.check_nginx_pods_image(
                k8sclient,
                self.to_nginx_image),
            timeout=2 * 60)
def wait_running(kube_host_ip, timeout=120, interval=5):
    helpers.wait_pass(
        lambda: get_status(kube_host_ip),
        timeout=timeout, interval=interval)
 def wait_netchecker_running(kube_host_ip, timeout=120, interval=5):
     helpers.wait_pass(
         lambda: TestFuelCCPNetChecker.get_netchecker_status(kube_host_ip),
         timeout=timeout,
         interval=interval)
Exemple #24
0
        interface = self.get_interface_by_nailgun_network_name(name)
        return interface.address_set.first().ip_address

    def remote(self, network_name, login, password=None, private_keys=None):
        """Create SSH-connection to the network

        :rtype : SSHClient
        """
        return SSHClient(
            self.get_ip_address_by_network_name(network_name),
            username=login,
            password=password, private_keys=private_keys)

    def await(self, network_name, timeout=120, by_port=22):
        wait_pass(
            lambda: tcp_ping_(
                self.get_ip_address_by_network_name(network_name), by_port),
            timeout=timeout)

    # NEW
    def add_interfaces(self, interfaces):
        for interface in interfaces:
            label = interface['label']
            l2_network_device_name = interface.get('l2_network_device')
            interface_model = interface.get('interface_model')
            self.add_interface(
                label=label,
                l2_network_device_name=l2_network_device_name,
                interface_model=interface_model)

    # NEW
    def add_interface(self, label, l2_network_device_name, interface_model):
Exemple #25
0
    def test_daemonset_rollingupdate(self, k8scluster, show_step):
        """Update a daemonset using updateStrategy type: RollingUpdate

        Scenario:
            1. Deploy k8s using fuel-ccp-installer
            2. Create a DaemonSet for nginx with image version 1_10 and
               update strategy RollingUpdate
            3. Wait until nginx pods are created and become 'ready'
            4. Check that the image version in the nginx pods is 1_10
               Check that the image version in the nginx daemonset is 1_10
            5. Change nginx image version to 1_11 using YAML
            6. Wait for 10 seconds (needs to check that there were
               no auto updates of the nginx pods)
            7. Check that the image version in the nginx daemonset
               is updated to 1_11
               Wait for ~120 sec that the image version in the nginx pods
               is changed to 1_11

        Duration: 3000 seconds
        """

        # STEP #1
        show_step(1)
        k8sclient = k8scluster.api
        assert k8sclient.nodes.list() is not None, "Can not get nodes list"

        # STEP #2
        show_step(2)
        nginx_spec = self.get_nginx_spec()
        nginx_spec['spec']['template']['spec']['containers'][0][
            'image'] = self.from_nginx_image
        k8sclient.daemonsets.create(body=nginx_spec)

        # STEP #3
        show_step(3)
        time.sleep(3)
        self.wait_nginx_pods_ready(k8sclient)

        # STEP #4
        show_step(4)
        self.check_nginx_pods_image(k8sclient, self.from_nginx_image)
        self.check_nginx_ds_image(k8sclient, self.from_nginx_image)

        # STEP #5
        show_step(5)
        nginx_spec['spec']['template']['spec']['containers'][0][
            'image'] = self.to_nginx_image
        k8sclient.daemonsets.update(body=nginx_spec,
                                    name=nginx_spec['metadata']['name'])

        # STEP #6
        show_step(6)
        time.sleep(10)

        # STEP #7
        show_step(7)
        # DaemonSet should have new image version
        self.check_nginx_ds_image(k8sclient, self.to_nginx_image)
        # Pods should have new image version
        helpers.wait_pass(lambda: self.check_nginx_pods_image(
            k8sclient, self.to_nginx_image),
                          timeout=2 * 60)
    def manual_cic_maintenance_mode(self):
        """Check manual maintenance mode for controller

        Scenario:
            1. Revert snapshot
            2. Switch in maintenance mode
            3. Wait until controller is rebooting
            4. Exit maintenance mode
            5. Check the controller become available

        Duration 155m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        # Select a non-primary controller
        regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02")
        dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node(
            regular_ctrl)
        _ip = regular_ctrl['ip']
        _id = regular_ctrl['id']
        logger.info('Maintenance mode for node-{0}'.format(_id))
        asserts.assert_true('True' in check_available_mode(_ip),
                            "Maintenance mode is not available")
        self.ssh_manager.execute_on_remote(
            ip=_ip,
            cmd="umm on")

        self.fuel_web.wait_node_is_offline(dregular_ctrl)

        asserts.assert_true(
            checkers.check_ping(self.env.get_admin_node_ip(),
                                _ip,
                                deadline=600),
            "Host {0} is not reachable by ping during 600 sec"
            .format(_ip))

        asserts.assert_true('True' in check_auto_mode(_ip),
                            "Maintenance mode is not switched on")

        self.ssh_manager.execute_on_remote(
            ip=_ip,
            cmd="umm off")

        self.fuel_web.wait_node_is_online(dregular_ctrl)

        # Wait until Cinder services UP on a controller
        self.fuel_web.wait_cinder_is_up(
            [dregular_ctrl.name])

        # Wait until RabbitMQ cluster is UP
        wait_pass(lambda:
                  self.fuel_web.run_single_ostf_test(
                      cluster_id, test_sets=['ha'],
                      test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                          'RabbitMQ availability')),
                  timeout=1500)
        logger.info('RabbitMQ cluster is available')

        wait_pass(lambda:
                  self.fuel_web.run_single_ostf_test(
                      cluster_id, test_sets=['sanity'],
                      test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                          'Check that required services are running')),
                  timeout=1500)
        logger.info("Required services are running")

        # TODO(astudenov): add timeout_msg
        try:
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
        except AssertionError:
            logger.debug("Test failed from first probe,"
                         " we sleep 600 second try one more time"
                         " and if it fails again - test will fails ")
            time.sleep(600)
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
    def negative_auto_cic_maintenance_mode(self):
        """Check negative scenario for auto maintenance mode

        Scenario:
            1. Revert snapshot
            2. Disable UMM
            3. Change UMM.CONF
            4. Unexpected reboot
            5. Check the controller not switching in maintenance mode
            6. Check the controller become available

        Duration 85m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        # Select a non-primary controller
        regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02")
        dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node(
            regular_ctrl)
        _ip = regular_ctrl['ip']
        _id = regular_ctrl['id']

        asserts.assert_true('True' in check_available_mode(_ip),
                            "Maintenance mode is not available")
        logger.info('Disable UMM  on node-{0}'.format(_id))

        change_config(_ip, umm=False, reboot_count=0)

        asserts.assert_false('True' in check_available_mode(_ip),
                             "Maintenance mode should not be available")

        command = 'reboot --force >/dev/null & '

        logger.info('Unexpected reboot on node-{0}'
                    .format(_id))

        self.ssh_manager.execute_on_remote(
            ip=_ip,
            cmd=command)

        wait(lambda:
             not checkers.check_ping(self.env.get_admin_node_ip(),
                                     _ip),
             timeout=60 * 10,
             timeout_msg='Node {} still responds to ping'.format(
                 dregular_ctrl.name))

        # Node don't have enough time for set offline status
        # after reboot --force
        # Just waiting

        asserts.assert_true(
            checkers.check_ping(self.env.get_admin_node_ip(),
                                _ip,
                                deadline=600),
            "Host {0} is not reachable by ping during 600 sec"
            .format(_ip))

        self.fuel_web.wait_node_is_online(dregular_ctrl)

        logger.info('Check that node-{0} not in maintenance mode after'
                    ' unexpected reboot'.format(_id))

        wait(lambda: tcp_ping(_ip, 22),
             timeout=60 * 10,
             timeout_msg='Node {} still is not available by SSH'.format(
                 dregular_ctrl.name))

        asserts.assert_false('True' in check_auto_mode(_ip),
                             "Maintenance mode should not switched")

        # Wait until MySQL Galera is UP on some controller
        self.fuel_web.wait_mysql_galera_is_up(
            [dregular_ctrl.name])

        # Wait until Cinder services UP on a controller
        self.fuel_web.wait_cinder_is_up(
            [dregular_ctrl.name])

        # Wait until RabbitMQ cluster is UP
        wait_pass(lambda:
                  self.fuel_web.run_single_ostf_test(
                      cluster_id, test_sets=['ha'],
                      test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                          'RabbitMQ availability')),
                  timeout=1500)
        logger.info('RabbitMQ cluster is available')

        # TODO(astudenov): add timeout_msg
        wait_pass(lambda:
                  self.fuel_web.run_single_ostf_test(
                      cluster_id, test_sets=['sanity'],
                      test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                          'Check that required services are running')),
                  timeout=1500)
        logger.info("Required services are running")

        try:
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
        except AssertionError:
            logger.debug("Test failed from first probe,"
                         " we sleep 600 second try one more time"
                         " and if it fails again - test will fails ")
            time.sleep(600)
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
Exemple #28
0
    def test_restart_osd_node(self, salt_actions, drivetrain_actions,
                              underlay_actions, show_step):
        """Verify that Ceph OSD node is not affected by system restart

        Scenario:
        1. Find Ceph OSD nodes
        2. Check Ceph cluster health before node restart (skipped until
            PROD-31374 is fixed)
        3. Restart 1 Ceph OSD node
        4. Check Ceph cluster health after node restart (skipped until
            PROD-31374 is fixed)
        5. Run Tempest smoke test suite
        6. Run test_ceph_status.py::test_ceph_osd and
            test_services.py::test_check_services[osd] sanity tests

        Duration: ~9 min
        """
        salt = salt_actions
        ssh = underlay_actions
        dt = drivetrain_actions

        # Find Ceph OSD nodes
        show_step(1)
        tgt = "I@ceph:osd"
        osd_hosts = salt.local(tgt, "test.ping")['return'][0].keys()
        # Select a node for the test
        osd_host = osd_hosts[0]

        # Check Ceph cluster health before node restart
        show_step(2)
        ceph_health = self.get_ceph_health(ssh, osd_hosts)
        # FIXME: uncomment the check once PROD-31374 is fixed
        # status = all(
        #     ["OK" in status for node, status in ceph_health.items()])
        # assert status, "Ceph health is not OK: {0}".format(ceph_health)

        # Restart a Ceph OSD node
        show_step(3)
        LOG.info("Sending reboot command to '{}' node.".format(osd_host))
        remote = ssh.remote(node_name=osd_host)
        remote.execute_async("/sbin/shutdown -r now")

        # Wait for restarted node to boot and become accessible
        helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5)
        echo_request = "echo"
        echo_response = salt.local(osd_host, "test.echo",
                                   echo_request)['return'][0]
        assert echo_request == echo_response[osd_host], (
            "Minion on node '{}' node is not responding after node "
            "reboot.".format(osd_host))
        LOG.info("'{}' node is back after reboot.".format(osd_host))

        # Check Ceph cluster health after node restart
        show_step(4)
        ceph_health = self.get_ceph_health(ssh, osd_hosts)  # noqa
        # FIXME: uncomment the check once PROD-31374 is fixed
        # status = all(
        #     ["OK" in status for node, status in ceph_health.items()])
        # assert status, "Ceph health is not OK: {0}".format(ceph_health)

        # Run Tempest smoke test suite
        show_step(5)
        status = dt.start_job_on_cid_jenkins(
            job_name=self.TEMPEST_JOB_NAME,
            job_parameters=self.TEMPEST_JOB_PARAMETERS,
            start_timeout=self.JENKINS_START_TIMEOUT,
            build_timeout=self.JENKINS_BUILD_TIMEOUT)

        assert status == 'SUCCESS', (
            "'{0}' job run status is {1} after executing Tempest smoke "
            "tests".format(self.TEMPEST_JOB_NAME, status))

        # Run Sanity test
        show_step(6)
        status = dt.start_job_on_cid_jenkins(
            job_name=self.SANITY_JOB_NAME,
            job_parameters=self.SANITY_JOB_PARAMETERS,
            start_timeout=self.JENKINS_START_TIMEOUT,
            build_timeout=self.JENKINS_BUILD_TIMEOUT)
        assert status == 'SUCCESS', (
            "'{0}' job run status is {1} after executing selected sanity "
            "tests".format(self.SANITY_JOB_NAME, status))
    def test_daemonset_multirollout_rollingupdate_revision(self,
                                                           config,
                                                           k8scluster,
                                                           show_step,
                                                           underlay):
        """Rollout a daemonset using updateStrategy type: RollingUpdate and
            --to-revision argument

        Scenario:
            1. Deploy k8s using fuel-ccp-installer
            2. Create a DaemonSet for nginx with image version 1_10 and
               update strategy RollingUpdate
            3. Wait until nginx pods are created and become 'ready'
            4. Check that the image version in the nginx pods is 1_10
               Check that the image version in the nginx daemonset is 1_10
            5. Change nginx image version to 1_11 using YAML
            6. Check that the image version in the nginx daemonset
               is updated to 1_11
               Wait for ~120 sec that the image version in the nginx pods
               is changed to 1_11
            7. Change nginx image version to 1_12 using YAML
            8. Check that the image version in the nginx daemonset
                is updated to 1_12.
               Wait for ~120 sec that the image version in the nginx pods
               is changed to 1_12 .
            9. Get the revision #1 and check that there are the image
               version 1_10
            10. Get the revision #2 and check that there are the image
                version 1_11
            11. Get the revision #3 and check that there are the image
                version 1_12
            12. Rollback the nginx daemonset to revision #1:
                kubectl rollout undo daemonset/nginx --to-revision=1
            13. Check that the image version in the nginx daemonset
                is updated to 1_10
                Wait for ~120 sec that the image version in the nginx pods
                is changed to 1_10
            14. Rollback the nginx daemonset:
                kubectl rollout undo daemonset/nginx
            15. Check that the image version in the nginx daemonset
                is updated to 1_12
                Wait for ~120 sec that the image version in the nginx pods
                is changed to 1_12

        Duration: 3000 seconds
        """

        # STEP #1
        show_step(1)
        k8sclient = k8scluster.api
        assert k8sclient.nodes.list() is not None, "Can not get nodes list"

        # STEP #2
        show_step(2)
        nginx_spec = self.get_nginx_spec()
        nginx_spec['spec']['template']['spec']['containers'][0][
            'image'] = self.from_nginx_image
        k8sclient.daemonsets.create(body=nginx_spec)

        # STEP #3
        show_step(3)
        time.sleep(3)
        self.wait_nginx_pods_ready(k8sclient)

        # STEP #4
        show_step(4)
        self.check_nginx_pods_image(k8sclient, self.from_nginx_image)
        self.check_nginx_ds_image(k8sclient, self.from_nginx_image)

        # STEP #5
        show_step(5)
        nginx_spec['spec']['template']['spec']['containers'][0][
            'image'] = self.to_nginx_image
        k8sclient.daemonsets.update(body=nginx_spec,
                                    name=nginx_spec['metadata']['name'])

        # STEP #6
        show_step(6)

        # DaemonSet should have new image version
        self.check_nginx_ds_image(k8sclient, self.to_nginx_image)
        # Pods should have new image version
        helpers.wait_pass(
            lambda: self.check_nginx_pods_image(
                k8sclient,
                self.to_nginx_image),
            timeout=2 * 60)

        # STEP #7
        show_step(7)
        nginx_spec['spec']['template']['spec']['containers'][0][
            'image'] = self.to_nginx_image_1_12
        k8sclient.daemonsets.update(body=nginx_spec,
                                    name=nginx_spec['metadata']['name'])

        # STEP #8
        show_step(8)

        # DaemonSet should have new image version
        self.check_nginx_ds_image(k8sclient, self.to_nginx_image_1_12)
        # Pods should have new image version
        helpers.wait_pass(
            lambda: self.check_nginx_pods_image(
                k8sclient,
                self.to_nginx_image_1_12),
            timeout=2 * 60)

        # STEP #9
        show_step(9)
        self.check_nginx_revision_image(config=config.k8s.kube_host,
                                        underlay=underlay,
                                        revision="1",
                                        nginx_image=self.from_nginx_image)

        # STEP #10
        show_step(10)
        self.check_nginx_revision_image(config=config.k8s.kube_host,
                                        underlay=underlay,
                                        revision="2",
                                        nginx_image=self.to_nginx_image)

        # STEP #11
        show_step(11)
        self.check_nginx_revision_image(config=config.k8s.kube_host,
                                        underlay=underlay,
                                        revision="3",
                                        nginx_image=self.to_nginx_image_1_12)

        # STEP #12
        show_step(12)
        cmd = "kubectl rollout undo daemonset/nginx --to-revision=1"
        underlay.check_call(cmd,
                            host=config.k8s.kube_host)

        # STEP #13
        show_step(13)
        self.check_nginx_ds_image(k8sclient, self.to_nginx_image)
        # Pods should have old image version
        helpers.wait_pass(
            lambda: self.check_nginx_pods_image(
                k8sclient,
                self.from_nginx_image),
            timeout=2 * 60
        )

        # STEP #14
        show_step(14)
        cmd = "kubectl rollout undo daemonset/nginx"
        underlay.check_call(cmd,
                            host=config.k8s.kube_host)

        # STEP #15
        show_step(15)
        self.check_nginx_ds_image(k8sclient, self.from_nginx_image)
        # Pods should have new image version
        helpers.wait_pass(
            lambda: self.check_nginx_pods_image(
                k8sclient,
                self.to_nginx_image_1_12),
            timeout=2 * 60
        )
def wait_check_network(kube_host_ip, works=True, timeout=120, interval=5):
    helpers.wait_pass(lambda: check_network(kube_host_ip, works=works),
                      timeout=timeout, interval=interval)
Exemple #31
0
    def test_calico_route_recovery(self, show_step, config, underlay,
                                   k8s_deployed):
        """Test for deploying k8s environment with Calico plugin and check
           that local routes are recovered by felix after removal

        Scenario:
            1. Check k8s installation.
            2. Check netchecker-server service.
            3. Check netchecker-agent daemon set.
            4. Get network verification status. Excepted status is 'OK'.
            5. Get metrics from netchecker.
            6. Remove local route to netchecker-agent pod on the first node.
            7. Check that the route is automatically recovered.
            8. Get network verification status. Excepted status is 'OK'.

        Duration: 3000 seconds
        """

        show_step(1)
        nch = netchecker.Netchecker(k8s_deployed.api)

        show_step(2)
        nch.wait_netchecker_pods_running('netchecker-server')

        show_step(3)
        nch.wait_netchecker_pods_running('netchecker-agent')

        show_step(4)
        nch.wait_check_network(works=True)

        show_step(5)
        res = nch.get_metric()

        assert res.status_code == 200, 'Unexpected response code {}'\
            .format(res)
        metrics = [
            'ncagent_error_count_total', 'ncagent_http_probe_code',
            'ncagent_http_probe_connect_time_ms',
            'ncagent_http_probe_connection_result',
            'ncagent_http_probe_content_transfer_time_ms',
            'ncagent_http_probe_dns_lookup_time_ms',
            'ncagent_http_probe_server_processing_time_ms',
            'ncagent_http_probe_tcp_connection_time_ms',
            'ncagent_http_probe_total_time_ms', 'ncagent_report_count_total'
        ]
        for metric in metrics:
            assert metric in res.text.strip(), \
                'Mandatory metric {0} is missing in {1}'.format(
                    metric, res.text)

        show_step(6)
        first_node = k8s_deployed.api.nodes.list()[0]
        first_node_ips = [
            addr.address for addr in first_node.read().status.addresses
            if 'IP' in addr.type
        ]
        assert len(first_node_ips) > 0, "Couldn't find first k8s node IP!"
        first_node_names = [
            name for name in underlay.node_names()
            if name.startswith(first_node.name)
        ]
        first_node_name = first_node_names[0]

        target_pod_ip = None

        for pod in k8s_deployed.api.pods.list(namespace='netchecker'):
            LOG.debug('NC pod IP: {0}'.format(pod.read().status.pod_ip))
            if pod.read().status.host_ip not in first_node_ips:
                continue
            # TODO: get pods by daemonset with name 'netchecker-agent'
            if 'netchecker-agent-' in pod.name and 'hostnet' not in pod.name:
                target_pod_ip = pod.read().status.pod_ip

        assert target_pod_ip is not None, "Could not find netchecker pod IP!"

        route_del_cmd = 'ip route delete {0}'.format(target_pod_ip)
        underlay.sudo_check_call(cmd=route_del_cmd, node_name=first_node_name)
        LOG.debug('Removed local route to pod IP {0} on node {1}'.format(
            target_pod_ip, first_node.name))

        show_step(7)
        route_chk_cmd = 'ip route list | grep -q "{0}"'.format(target_pod_ip)
        helpers.wait_pass(lambda: underlay.sudo_check_call(
            cmd=route_chk_cmd, node_name=first_node_name),
                          timeout=120,
                          interval=2)
        pod_ping_cmd = 'sleep 120 && ping -q -c 1 -w 3 {0}'.format(
            target_pod_ip)
        underlay.sudo_check_call(cmd=pod_ping_cmd, node_name=first_node_name)
        LOG.debug('Local route to pod IP {0} on node {1} is '
                  'recovered'.format(target_pod_ip, first_node.name))

        show_step(8)
        nch.wait_check_network(works=True)
Exemple #32
0
    def deploy_env_with_public_api(self):
        """Deploy environment with enabled DMZ network for API.

        Scenario:
            1. Revert snapshot with ready master node
            2. Create new environment
            3. Run network verification
            4. Deploy the environment
            5. Run network verification
            6. Run OSTF
            7. Reboot cluster nodes
            8. Run OSTF
            9. Create environment snapshot deploy_env_with_public_api

        Duration 120m
        Snapshot deploy_env_with_public_api
        """

        asserts.assert_true(settings.ENABLE_DMZ,
                            "ENABLE_DMZ variable wasn't exported")
        self.check_run('deploy_env_with_public_api')

        self.show_step(1)
        self.env.revert_snapshot('ready_with_5_slaves')

        self.show_step(2)
        cluster_id = self.fuel_web.create_cluster(
            name=self.__class__.__name__,
        )

        self.fuel_web.update_nodes(
            cluster_id,
            {
                'slave-01': ['controller'],
                'slave-02': ['compute'],
                'slave-03': ['cinder'],
            },
            update_interfaces=False
        )

        network_template = utils.get_network_template('public_api')
        self.fuel_web.client.upload_network_template(
            cluster_id=cluster_id, network_template=network_template)

        net = self.fuel_web.get_network_pool('os-api')
        nodegroup = self.fuel_web.get_nodegroup(cluster_id)
        os_api_template = {
            "group_id": nodegroup['id'],
            "name": 'os-api',
            "cidr": net['network'],
            "gateway": net['gateway'],
            "meta": {
                'notation': 'cidr',
                'render_type': None,
                'map_priority': 2,
                'configurable': True,
                'use_gateway': True,
                'name': 'os-api',
                'cidr': net['network'],
                'vlan_start': None,
                'vips': ['haproxy']
            }
        }
        self.fuel_web.client.add_network_group(os_api_template)

        logger.debug('Networks: {0}'.format(
            self.fuel_web.client.get_network_groups()))

        self.show_step(3)
        self.fuel_web.verify_network(cluster_id)

        self.show_step(4)
        self.fuel_web.deploy_cluster_wait(cluster_id, timeout=180 * 60)

        self.show_step(5)
        self.fuel_web.verify_network(cluster_id)

        self.show_step(6)
        self.fuel_web.run_ostf(cluster_id=cluster_id)

        self.show_step(7)
        nodes = self.fuel_web.client.list_cluster_nodes(cluster_id)
        self.fuel_web.warm_restart_nodes(
            self.fuel_web.get_devops_nodes_by_nailgun_nodes(nodes))

        controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            cluster_id=cluster_id,
            roles=['controller']
        )[0]
        controller_devops = \
            self.fuel_web.get_devops_node_by_nailgun_node(controller)

        # Wait until MySQL Galera is UP on some controller
        self.fuel_web.wait_mysql_galera_is_up([controller_devops.name])

        # Wait until Cinder services UP on a controller
        self.fuel_web.wait_cinder_is_up([controller_devops.name])

        wait_pass(
            lambda: self.fuel_web.run_ostf(cluster_id,
                                           test_sets=['sanity', 'smoke']),
            interval=10,
            timeout=12 * 60
        )

        self.show_step(8)
        self.fuel_web.run_ostf(cluster_id=cluster_id)

        self.show_step(9)
        self.env.make_snapshot('deploy_env_with_public_api', is_make=True)
    def manual_cic_maintenance_mode(self):
        """Check manual maintenance mode for controller

        Scenario:
            1. Revert snapshot
            2. Switch in maintenance mode
            3. Wait until controller is rebooting
            4. Exit maintenance mode
            5. Check the controller become available

        Duration 155m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        # Select a non-primary controller
        regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02")
        dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node(
            regular_ctrl)
        _ip = regular_ctrl['ip']
        _id = regular_ctrl['id']
        logger.info('Maintenance mode for node-{0}'.format(_id))
        asserts.assert_true('True' in check_available_mode(_ip),
                            "Maintenance mode is not available")
        self.ssh_manager.check_call(ip=_ip, command="umm on", expected=[-1])

        self.fuel_web.wait_node_is_offline(dregular_ctrl)

        asserts.assert_true(
            checkers.check_ping(self.env.get_admin_node_ip(),
                                _ip,
                                deadline=600),
            "Host {0} is not reachable by ping during 600 sec".format(_ip))

        asserts.assert_true('True' in check_auto_mode(_ip),
                            "Maintenance mode is not switched on")

        self.ssh_manager.check_call(ip=_ip, command="umm off")

        self.fuel_web.wait_node_is_online(dregular_ctrl)

        # Wait until Cinder services UP on a controller
        self.fuel_web.wait_cinder_is_up([dregular_ctrl.name])

        # Wait until RabbitMQ cluster is UP
        wait_pass(lambda: self.fuel_web.run_single_ostf_test(
            cluster_id,
            test_sets=['ha'],
            test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                'RabbitMQ availability')),
                  timeout=1500)
        logger.info('RabbitMQ cluster is available')

        wait_pass(lambda: self.fuel_web.run_single_ostf_test(
            cluster_id,
            test_sets=['sanity'],
            test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                'Check that required services are running')),
                  timeout=1500)
        logger.info("Required services are running")

        # TODO(astudenov): add timeout_msg
        try:
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
        except AssertionError:
            logger.debug("Test failed from first probe,"
                         " we sleep 600 second try one more time"
                         " and if it fails again - test will fails ")
            time.sleep(600)
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
    def wait_nailgun_available(self):
        """Check status for Nailgun"""

        fuel_web = self.manager.fuel_web

        wait_pass(fuel_web.get_nailgun_version, timeout=60 * 20)
Exemple #35
0
 def await (self, network_name, timeout=120, by_port=22):
     wait_pass(lambda: tcp_ping_(
         self.get_ip_address_by_network_name(network_name), by_port),
               timeout=timeout)
Exemple #36
0
    def test_salt_master_scheduled_backup_restore(
            self,
            underlay_actions,
            salt_actions,
            reclass_actions,
            show_step,
            precreate_sm_test_files,
            cleanup_actions):
        """Test scheduled backup restore of Salt master data

        Scenario:
            1. Update Salt master backup schedule to run every 5 minutes
            2. Apply 'backupninja' state on the backupninja client node
            3. Wait until backup creation is triggered by schedule
            4. Wait until backup creation is finished
            5. Verify that Salt master backup is created on backupninja server
               node
            6. Delete/change some reclass data
            7. Restore the backup
            8. Verify that Salt master data backup is restored
            9. Verify that minions are responding

        Duration: ~ 3 min
        """
        salt = salt_actions
        ssh = underlay_actions
        reclass = reclass_actions

        sm = salt.local("I@salt:master", "test.ping")['return'][0].keys()[0]
        server = salt.local(
            "I@backupninja:server", "test.ping")['return'][0].keys()[0]

        # Re-configure backup schedule
        show_step(1)
        self.update_backup_schedule(reclass, self.SM_YAML)

        # Apply 'backupninja' state on backupninja client node
        show_step(2)
        salt.enforce_state("I@backupninja:client", "backupninja")

        # Wait until backup is triggered by schedule
        show_step(3)
        helpers.wait_pass(
            lambda: ssh.check_call(
                cmd="pgrep backupninja && echo OK", node_name=sm),
            timeout=60 * 11,
            interval=5)

        # Wait until backup is finished
        show_step(4)
        ssh.check_call(
            cmd="while pgrep backupninja > /dev/null; do sleep 2; done",
            node_name=sm,
            timeout=60 * 5)

        # Verify that backup is created and all pieces of data are rsynced
        # to backupninja server
        show_step(5)
        self.check_backup(
            ssh, server, self.BCKP_SERVER_DIR, sm, self.SM_BACKUP_DIRS)

        # Simulate loss/change of some reclass data
        show_step(6)
        self.delete_files(ssh, sm, self.SM_DIRS, self.SM_FILE_TO_DELETE)
        hashes = self.update_files(
            ssh, sm, self.SM_DIRS, self.SM_FILE_TO_UPDATE)

        # Restore the backup
        show_step(7)
        ssh.check_call(
            "salt-call state.sls salt.master.restore,salt.minion.restore",
            node_name=sm,
            raise_on_err=False,
            timeout=60 * 4)

        # Verify that all pieces of lost/changed data are restored
        show_step(8)
        self.verify_restored_data(
            ssh,
            sm,
            self.SM_DIRS,
            self.SM_FILE_TO_DELETE,
            self.SM_FILE_TO_UPDATE,
            hashes)

        # Ping minions
        show_step(9)
        salt.local('*', "test.ping", timeout=30)
    def create_component_and_env_configdb(self):
        """ Install and check ConfigDB

        Scenario:
            1. Revert snapshot empty
            2. Install configDB extension
            3. Create components
            4. Create environment with component
            5. Get and check created data
            6. Make snapshot

        Duration: 5 min
        Snapshot: create_component_and_env_configdb
        """

        self.check_run('create_component_and_env_configdb')
        self.show_step(1)
        self.env.revert_snapshot('empty')
        self.show_step(2)
        install_configdb()

        logger.debug('Waiting for ConfigDB')
        wait_pass(lambda: self.fuel_web.client.get_components(), timeout=45)

        logger.debug('Get env and component data')
        components = self.fuel_web.client.get_components()
        envs = self.fuel_web.client.get_environments()

        assert_false(components,
                     "Components is not empty after tuningbox installation")
        assert_false(envs,
                     "Environments is not empty after tuningbox installation")

        # Uploaded data
        component = {
            "name":
            "comp1",
            "resource_definitions": [{
                "name": self.RESOURCE_NAME_1,
                "content": {}
            }, {
                "name": self.SLASHED_RESOURCE,
                "content": {}
            }]
        }

        environment = {
            "name": "env1",
            "components": ["comp1"],
            "hierarchy_levels": ["nodes"]
        }
        self.show_step(3)
        self.fuel_web.client.create_component(component)
        self.show_step(4)
        self.fuel_web.client.create_environment(environment)
        self.show_step(5)
        comp = self.fuel_web.client.get_components(comp_id=1)
        env = self.fuel_web.client.get_environments(env_id=1)

        expected_comp = {
            'resource_definitions': [{
                'content': {},
                'component_id': 1,
                'id': 1,
                'name': self.RESOURCE_NAME_1
            }, {
                'content': {},
                'component_id': 1,
                'id': 2,
                'name': self.SLASHED_RESOURCE
            }],
            'id':
            1,
            'name':
            "comp1"
        }
        expected_env = {
            'hierarchy_levels': ["nodes"],
            'id': 1,
            'components': [1]
        }
        logger.debug('Compare original component with '
                     'received component from API')
        assert_equal(comp, expected_comp)
        logger.debug('Compare original env with received env from API')
        assert_equal(env, expected_env)
        self.show_step(6)
        self.env.make_snapshot('create_component_and_env_configdb',
                               is_make=True)
Exemple #38
0
    def auto_cic_maintenance_mode(self):
        """Check auto maintenance mode for controller

        Scenario:
            1. Revert snapshot
            2. Unexpected reboot
            3. Wait until controller is switching in maintenance mode
            4. Exit maintenance mode
            5. Check the controller become available

        Duration 155m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        # Select a non-primary controller
        regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02")
        dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node(
            regular_ctrl)
        _ip = regular_ctrl['ip']
        _id = regular_ctrl['id']

        asserts.assert_true('True' in check_available_mode(_ip),
                            "Maintenance mode is not available")

        change_config(_ip, reboot_count=0)

        logger.info('Change UMM.CONF on node-{0}'.format(_id))

        logger.info('Unexpected reboot on node-{0}'.format(_id))

        command = 'reboot --force >/dev/null & '

        self.ssh_manager.execute_on_remote(ip=_ip, cmd=command)

        wait(
            lambda: not checkers.check_ping(self.env.get_admin_node_ip(), _ip),
            timeout=60 * 10,
            timeout_msg='Node {} still responds to ping'.format(
                dregular_ctrl.name))

        self.fuel_web.wait_node_is_offline(dregular_ctrl)

        logger.info('Check that node-{0} in maintenance mode after'
                    ' unexpected reboot'.format(_id))
        asserts.assert_true(
            checkers.check_ping(self.env.get_admin_node_ip(),
                                _ip,
                                deadline=600),
            "Host {0} is not reachable by ping during 600 sec".format(_ip))

        asserts.assert_true('True' in check_auto_mode(_ip),
                            "Maintenance mode is not switched on")

        logger.info('turn off Maintenance mode')
        self.ssh_manager.execute_on_remote(ip=_ip, cmd="umm off")
        time.sleep(30)

        change_config(_ip)

        self.fuel_web.wait_node_is_online(dregular_ctrl)

        # Wait until MySQL Galera is UP on some controller
        self.fuel_web.wait_mysql_galera_is_up([dregular_ctrl.name])

        # Wait until Cinder services UP on a controller
        self.fuel_web.wait_cinder_is_up([dregular_ctrl.name])

        # Wait until RabbitMQ cluster is UP
        wait_pass(lambda: self.fuel_web.run_single_ostf_test(
            cluster_id,
            test_sets=['ha'],
            test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                'RabbitMQ availability')),
                  timeout=1500)
        logger.info('RabbitMQ cluster is available')

        # Wait until all Openstack services are UP
        wait_pass(lambda: self.fuel_web.run_single_ostf_test(
            cluster_id,
            test_sets=['sanity'],
            test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                'Check that required services are running')),
                  timeout=1500)
        logger.info("Required services are running")

        try:
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])
        except AssertionError:
            logger.debug("Test failed from first probe,"
                         " we sleep 600 second try one more time"
                         " and if it fails again - test will fails ")
            time.sleep(600)
            self.fuel_web.run_ostf(cluster_id,
                                   test_sets=['smoke', 'sanity', 'ha'])