def upgrade_rollback_new_deploy(self):
        """After rollback delete existing cluster and deploy new one,

        Scenario:
        1. Revert "upgrade_rollback_ceph_ha" snapshot.
        2. Delete cluster and wait until nodes are bootstraped.
        3. Create new cluster with NeutronVLAN + Ceph.
        4. Add 3 controllers.
        5. Add 2 compute + ceph nodes.
        6. Deploy cluster.
        7. Verify networks.
        8. Run OSTF.

        Duration: TODO
        """
        self.show_step(1)
        self.env.revert_snapshot(self.snapshot_name, skip_timesync=True)

        self.show_step(2)
        cluster_id = self.fuel_web.get_last_created_cluster()
        devops_nodes = self.fuel_web.get_devops_nodes_by_nailgun_nodes(
            self.fuel_web.client.list_cluster_nodes(cluster_id=cluster_id))
        self.fuel_web.client.delete_cluster(cluster_id)
        wait(lambda: not any([cluster['id'] == cluster_id for cluster in
                              self.fuel_web.client.list_clusters()]),
             timeout=60 * 10)
        self.env.bootstrap_nodes(devops_nodes)

        self.show_step(3)
        cluster_settings = {
            'net_provider': settings.NEUTRON,
            'net_segment_type': settings.NEUTRON_SEGMENT['vlan'],
            'volumes_lvm': False,
            'volumes_ceph': True,
            'images_ceph': True,
            'objects_ceph': True,
            'ephemeral_ceph': True,
        }
        cluster_settings.update(self.cluster_creds)

        cluster_id = self.fuel_web.create_cluster(
            name=self.upgrade_rollback_new_deploy.__name__,
            mode=settings.DEPLOYMENT_MODE,
            settings=cluster_settings)

        self.show_step(4)
        self.show_step(5)
        self.fuel_web.update_nodes(
            cluster_id,
            {'slave-01': ['controller'],
             'slave-02': ['controller'],
             'slave-03': ['controller'],
             'slave-04': ['compute', 'ceph-osd'],
             'slave-05': ['compute', 'ceph-osd']})
        self.show_step(6)
        self.fuel_web.deploy_cluster_wait(cluster_id)
        self.show_step(7)
        self.fuel_web.verify_network(cluster_id)
        self.show_step(8)
        self.fuel_web.run_ostf(cluster_id)
Example #2
0
    def setup_environment(self, custom=settings.CUSTOM_ENV,
                          build_images=settings.BUILD_IMAGES,
                          iso_connect_as=settings.ADMIN_BOOT_DEVICE,
                          security=settings.SECURITY_TEST):
        # Create environment and start the Fuel master node
        admin = self.d_env.nodes().admin
        self.d_env.start([admin])

        logger.info("Waiting for admin node to start up")
        wait(lambda: admin.driver.node_active(admin), 60)
        logger.info("Proceed with installation")
        # update network parameters at boot screen
        admin.send_keys(self.get_keys(admin, custom=custom,
                                      build_images=build_images,
                                      iso_connect_as=iso_connect_as))
        if settings.SHOW_FUELMENU:
            self.wait_for_fuelmenu()
        else:
            self.wait_for_provisioning()

        self.set_admin_ssh_password()

        self.wait_for_external_config()
        if custom:
            self.setup_customisation()
        if security:
            nessus_node = NessusActions(self.d_env)
            nessus_node.add_nessus_node()
        # wait while installation complete

        self.admin_actions.modify_configs(self.d_env.router())
        self.kill_wait_for_external_config()
        self.wait_bootstrap()
        self.admin_actions.wait_for_fuel_ready()
Example #3
0
def wait_phrase_in_log(node_ssh, timeout, interval, phrase, log_path):
    cmd = "grep '{0}' '{1}'".format(phrase, log_path)
    wait(
        lambda: not node_ssh.execute(cmd)['exit_code'], interval=interval,
        timeout=timeout,
        timeout_msg="The phrase {0} not found in {1} file on "
                    "remote node".format(phrase, log_path))
Example #4
0
    def test_cobbler_alive(self):
        """Test current installation has correctly setup cobbler

        API and cobbler HTTP server are alive

        Scenario:
            1. Revert snapshot "empty"
            2. test cobbler API and HTTP server through send http request

        """
        if OPENSTACK_RELEASE_CENTOS not in OPENSTACK_RELEASE:
            raise SkipTest()
        self.env.revert_snapshot("empty")
        wait(
            lambda: http(host=self.env.get_admin_node_ip(), url='/cobbler_api',
                         waited_code=501),
            timeout=60
        )
        server = xmlrpclib.Server(
            'http://%s/cobbler_api' % self.env.get_admin_node_ip())

        config = self.env.get_fuel_settings()
        username = config['cobbler']['user']
        password = config['cobbler']['password']

        # raises an error if something isn't right
        server.login(username, password)
Example #5
0
    def wait_for_slave_network_down(node_ip, timeout=10 * 20):
        """Wait for a target node network down.

        :param node_ip: IP address of target node.
        :param timeout: Timeout for wait function.
        """
        wait(lambda: (not tcp_ping(node_ip, 22)), interval=1, timeout=timeout, timeout_msg="Node doesn't gone offline")
Example #6
0
    def separate_db_service_controller_shutdown(self):
        """Shutdown primary controller node

        Scenario:
            1. Revert snapshot separate_db_service
            2. Shutdown primary controller node
            3. Wait rabbit and db are operational
            4. Run OSTF

        Duration 30m
        """
        self.env.revert_snapshot("separate_db_service")
        cluster_id = self.fuel_web.get_last_created_cluster()
        # shutdown primary controller
        controller = self.fuel_web.get_nailgun_primary_node(
            self.env.d_env.nodes().slaves[0])
        logger.debug(
            "controller with primary role is {}".format(controller.name))
        controller.destroy()
        wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node(
            controller)['online'], timeout=60 * 5)

        self.fuel_web.assert_ha_services_ready(cluster_id)
        self.fuel_web.assert_os_services_ready(cluster_id, timeout=15 * 60,
                                               should_fail=1)
        self.fuel_web.run_ostf(
            cluster_id=cluster_id, should_fail=1)
Example #7
0
def admin_change_config(admin_node,
                        hostname=MASTER_FQDN,
                        dns1=MASTER_DNS,
                        admin_centos_version=7,
                        static_interface='eth0'
                        ):
    """Change master node configuration via kernel param

    param: admin_node: Node
    param: hostname: String
    param: dns1: String
        :rtype : None
    """
    admin_net = admin_node.environment.get_network(name='admin')
    keys = get_keys(
        ip=admin_node.get_ip_address_by_network_name('admin'),
        mask=admin_net.netmask,
        gw=admin_net.default_gw,
        hostname=hostname,
        nat_interface='',
        dns1=dns1,
        showmenu='no',
        build_images=0,
        centos_version=admin_centos_version,
        static_interface=static_interface)

    print("Waiting for admin node to start up")
    wait(lambda: admin_node.driver.node_active(admin_node), 60)
    print("Proceed with installation")
    admin_node.send_keys(keys)
    def get_last_record(self,
                        serie,
                        conditions=None,
                        updated_after=0,
                        timeout=2 * 60):
        conditions = " and {}".format(conditions) if conditions else ""
        query = ("select * from \"{serie}\" "
                 "where time > {updated_after} {conditions} "
                 "order by time desc limit 1").format(
                     serie=serie,
                     conditions=conditions,
                     updated_after=updated_after)

        data = []

        def _get_data():
            result = self._make_query(query)
            try:
                data.append(result[0][serie][0])
                return True
            except IndexError:
                return False

        helpers.wait(
            _get_data,
            timeout=timeout,
            interval=timeout / 10,
            timeout_msg="Timeout waiting data for query `{}`".format(query))
        return data[-1]
Example #9
0
    def ha_check_monit(self):
        """Verify monit restarted nova
         service if it was killed

        Scenario:
            1. SSH to every compute node in cluster
            2. Kill nova-compute service
            3. Check service is restarted by monit

        Snapshot ha_check_monit

        """
        self.env.revert_snapshot("deploy_ha")
        for devops_node in self.env.nodes().slaves[3:5]:
            remote = self.fuel_web.get_ssh_for_node(devops_node.name)
            remote.execute("kill -9 `pgrep nova-compute`")
            wait(
                lambda: len(remote.execute('pgrep nova-compute')['stdout'])
                == 1, timeout=120)
            assert_true(len(remote.execute('pgrep nova-compute')['stdout'])
                        == 1, 'Nova service was not restarted')
            assert_true(len(remote.execute(
                "grep \"nova-compute.*trying to restart\" "
                "/var/log/monit.log")['stdout']) > 0,
                'Nova service was not restarted')
Example #10
0
 def create_volume(self, size=1):
     volume = self.cinder.volumes.create(size)
     helpers.wait(
         lambda: self.cinder.volumes.get(volume.id).status == "available",
         timeout=100)
     logger.info("Created volume")
     return self.cinder.volumes.get(volume.id)
Example #11
0
    def wait_for_slave_provision(node_ip, timeout=10 * 60):
        """Wait for a target node provision.

        :param node_ip: IP address of target node.
        :param timeout: Timeout for wait function.
        """
        wait(lambda: tcp_ping(node_ip, 22), timeout=timeout, timeout_msg="Node doesn't appear in network")
Example #12
0
def check_mysql(remote, node_name):
    check_cmd = 'pkill -0 -x mysqld'
    check_crm_cmd = ('crm resource status clone_p_mysql |'
                     ' grep -q "is running on: $HOSTNAME"')
    check_galera_cmd = ("mysql --connect_timeout=5 -sse \"SELECT"
                        " VARIABLE_VALUE FROM"
                        " information_schema.GLOBAL_STATUS"
                        " WHERE VARIABLE_NAME"
                        " = 'wsrep_local_state_comment';\"")
    try:
        wait(lambda: remote.execute(check_cmd)['exit_code'] == 0,
             timeout=300)
        logger.info('MySQL daemon is started on {0}'.format(node_name))
    except TimeoutError:
        logger.error('MySQL daemon is down on {0}'.format(node_name))
        raise
    _wait(lambda: assert_equal(remote.execute(check_crm_cmd)['exit_code'], 0,
                               'MySQL resource is NOT running on {0}'.format(
                                   node_name)), timeout=60)
    try:
        wait(lambda: ''.join(remote.execute(
            check_galera_cmd)['stdout']).rstrip() == 'Synced', timeout=600)
    except TimeoutError:
        logger.error('galera status is {0}'.format(''.join(remote.execute(
            check_galera_cmd)['stdout']).rstrip()))
        raise
    def check_instance_connectivity(remote, dhcp_namespace, instance_ip,
                                    instance_keypair):
        cmd_check_ns = 'ip netns list'
        namespaces = [
            l.strip() for l in remote.check_call(cmd_check_ns).stdout]
        logger.debug('Net namespaces on remote: {0}.'.format(namespaces))
        assert_true(dhcp_namespace in namespaces,
                    "Network namespace '{0}' doesn't exist on "
                    "remote slave!".format(dhcp_namespace))
        instance_key_path = '/root/.ssh/instancekey_rsa'
        remote.check_call('echo "{0}" > {1} && chmod 400 {1}'.format(
            instance_keypair.private_key, instance_key_path))

        cmd = (". openrc; ip netns exec {0} ssh -i {1}"
               " -o 'StrictHostKeyChecking no'"
               " cirros@{2} \"ping -c 1 {3}\"").format(dhcp_namespace,
                                                       instance_key_path,
                                                       instance_ip,
                                                       settings.PUBLIC_TEST_IP)
        err_msg = ("SSH command:\n{command}\nwas not completed with "
                   "exit code 0 after 3 attempts with 1 minute timeout.")
        wait(lambda: remote.execute(cmd)['exit_code'] == 0,
             interval=60, timeout=3 * 60,
             timeout_msg=err_msg.format(command=cmd))
        res = remote.execute(cmd)
        assert_equal(0, res['exit_code'],
                     'Instance has no connectivity, exit code {0},'
                     'stdout {1}, stderr {2}'.format(res['exit_code'],
                                                     res['stdout'],
                                                     res['stderr']))
Example #14
0
    def assign_floating_ip(self, srv, use_neutron=False):
        if use_neutron:
            #   Find external net id for tenant
            nets = self.neutron.list_networks()['networks']
            err_msg = "Active external network not found in nets:{}"
            ext_net_ids = [
                net['id'] for net in nets
                if net['router:external'] and net['status'] == "ACTIVE"]
            asserts.assert_true(ext_net_ids, err_msg.format(nets))
            net_id = ext_net_ids[0]
            #   Find instance port
            ports = self.neutron.list_ports(device_id=srv.id)['ports']
            err_msg = "Not found active ports for instance:{}"
            asserts.assert_true(ports, err_msg.format(srv.id))
            port = ports[0]
            #   Create floating IP
            body = {'floatingip': {'floating_network_id': net_id,
                                   'port_id': port['id']}}
            flip = self.neutron.create_floatingip(body)
            #   Wait active state for port
            port_id = flip['floatingip']['port_id']
            state = lambda: self.neutron.show_port(port_id)['port']['status']
            helpers.wait(lambda: state() == "ACTIVE")
            return flip['floatingip']

        fl_ips_pool = self.nova.floating_ip_pools.list()
        if fl_ips_pool:
            floating_ip = self.nova.floating_ips.create(
                pool=fl_ips_pool[0].name)
            self.nova.servers.add_floating_ip(srv, floating_ip)
            return floating_ip
Example #15
0
    def separate_db_service_restart(self):
        """Restart one database node

        Scenario:
            1. Revert snapshot separate_db_service
            2. Restart db node that is master
            3. Wait galera is up
            4. Run OSTF

        Duration 30m
        """
        self.env.revert_snapshot("separate_db_service")
        cluster_id = self.fuel_web.get_last_created_cluster()
        # restart one db node
        db_node = self.env.d_env.nodes().slaves[3]
        self.fuel_web.warm_restart_nodes([db_node])
        wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            db_node)['online'], timeout=60 * 5)
        # Wait until MySQL Galera is UP on some db node
        self.fuel_web.wait_mysql_galera_is_up(['slave-05'])
        self.fuel_web.assert_ha_services_ready(cluster_id)
        self.fuel_web.assert_os_services_ready(cluster_id, timeout=15 * 60)

        self.fuel_web.run_ostf(
            cluster_id=cluster_id)
Example #16
0
    def delete_environment(self):
        """Delete existing environment
        and verify nodes returns to unallocated state

        Scenario:
            1. Revert "simple flat" environment
            2. Delete environment
            3. Verify node returns to unallocated pull

        """
        self.env.revert_snapshot("deploy_simple_flat")

        cluster_id = self.fuel_web.get_last_created_cluster()
        self.fuel_web.client.delete_cluster(cluster_id)
        nailgun_nodes = self.fuel_web.client.list_nodes()
        nodes = filter(lambda x: x["pending_deletion"] is True, nailgun_nodes)
        assert_true(
            len(nodes) == 2, "Verify 2 node has pending deletion status"
        )
        wait(
            lambda:
            self.fuel_web.is_node_discovered(nodes[0]) and
            self.fuel_web.is_node_discovered(nodes[1]),
            timeout=10 * 60,
            interval=15
        )
Example #17
0
 def assert_cli_task_success(
         self, task, remote, timeout=70 * 60, interval=20):
     logger.info('Wait {timeout} seconds for task: {task}'
                 .format(timeout=timeout, task=task))
     start = time.time()
     try:
         wait(
             lambda: self.get_task(
                 remote, task['id'])['status'] != 'running',
             interval=interval,
             timeout=timeout
         )
     except TimeoutError:
         raise TimeoutError(
             "Waiting timeout {timeout} sec was reached for task: {task}"
             .format(task=task["name"], timeout=timeout))
     took = time.time() - start
     task = self.get_task(remote, task['id'])
     logger.info('Task finished in {took} seconds with the result: {task}'
                 .format(took=took, task=task))
     assert_equal(
         task['status'], 'ready',
         "Task '{name}' has incorrect status. {} != {}".format(
             task['status'], 'ready', name=task["name"]
         )
     )
Example #18
0
    def check_instance_connectivity(cls, remote, dhcp_namespace, instance_ip,
                                    instance_keypair):
        cmd_check_ns = 'ip netns list'
        namespaces = [l.strip() for l in run_on_remote(remote, cmd_check_ns)]
        logger.debug('Net namespaces on remote: {0}.'.format(namespaces))
        assert_true(dhcp_namespace in namespaces,
                    "Network namespace '{0}' doesn't exist on "
                    "remote slave!".format(dhcp_namespace))
        instance_key_path = '/root/.ssh/instancekey_rsa'
        run_on_remote(remote, 'echo "{0}" > {1} && chmod 400 {1}'.format(
            instance_keypair.private_key, instance_key_path))

        cmd = (". openrc; ip netns exec {0} ssh -i {1}"
               " -o 'StrictHostKeyChecking no'"
               " cirros@{2} \"ping -c 1 {3}\"").format(dhcp_namespace,
                                                       instance_key_path,
                                                       instance_ip,
                                                       settings.PUBLIC_TEST_IP)
        wait(lambda: remote.execute(cmd)['exit_code'] == 0, timeout=2 * 60)
        res = remote.execute(cmd)
        assert_equal(0, res['exit_code'],
                     'Instance has no connectivity, exit code {0},'
                     'stdout {1}, stderr {2}'.format(res['exit_code'],
                                                     res['stdout'],
                                                     res['stderr']))
Example #19
0
 def setup_environment(self, custom=settings.CUSTOM_ENV,
                       build_images=settings.BUILD_IMAGES):
     # start admin node
     admin = self.nodes().admin
     admin.disk_devices.get(device='cdrom').volume.upload(settings.ISO_PATH)
     self.get_virtual_environment().start(self.nodes().admins)
     logger.info("Waiting for admin node to start up")
     wait(lambda: admin.driver.node_active(admin), 60)
     logger.info("Proceed with installation")
     # update network parameters at boot screen
     admin.send_keys(self.get_keys(admin, custom=custom,
                     build_images=build_images))
     if custom:
         self.setup_customisation()
     # wait while installation complete
     admin.await(self.admin_net, timeout=10 * 60)
     self.set_admin_ssh_password()
     self.wait_bootstrap()
     time.sleep(10)
     self.set_admin_keystone_password()
     self.sync_time_admin_node()
     if settings.MULTIPLE_NETWORKS:
         self.describe_second_admin_interface()
         multiple_networks_hacks.configure_second_admin_cobbler(self)
         multiple_networks_hacks.configure_second_dhcrelay(self)
     self.nailgun_actions.set_collector_address(
         settings.FUEL_STATS_HOST,
         settings.FUEL_STATS_PORT,
         settings.FUEL_STATS_SSL)
     if settings.FUEL_STATS_ENABLED:
         self.fuel_web.client.send_fuel_stats(enabled=True)
         logger.info('Enabled sending of statistics to {0}:{1}'.format(
             settings.FUEL_STATS_HOST, settings.FUEL_STATS_PORT
         ))
Example #20
0
 def _ostf_test_wait(self, cluster_id, timeout):
     wait(
         lambda: all([run['status'] == 'finished'
                      for run in
                      self.client.get_ostf_test_run(cluster_id)]),
         timeout=timeout)
     return self.client.get_ostf_test_run(cluster_id)
Example #21
0
 def wrapper(*args, **kwargs):
     result = func(*args, **kwargs)
     try:
         if settings.UPLOAD_PATCHSET:
             if not settings.GERRIT_REFSPEC:
                 raise ValueError('REFSPEC should be set for CI tests.')
             logger.info("Uploading new patchset from {0}"
                         .format(settings.GERRIT_REFSPEC))
             remote = SSHClient(args[0].admin_node_ip,
                                username='******',
                                password='******')
             remote.upload(settings.PATCH_PATH.rstrip('/'),
                           '/tmp/fuel-ostf')
             remote.execute('source /opt/fuel_plugins/ostf/bin/activate; '
                            'cd /tmp/fuel-ostf; python setup.py develop')
             remote.execute('/etc/init.d/supervisord restart')
             helpers.wait(
                 lambda: "RUNNING" in
                 remote.execute("supervisorctl status ostf | awk\
                                '{print $2}'")['stdout'][0],
                 timeout=60)
             logger.info("OSTF status: RUNNING")
     except Exception as e:
         logger.error("Could not upload patch set {e}".format(e=e))
         raise
     return result
Example #22
0
    def cluster_deletion(self):
        """
        Scenario:
            1. Revert snapshot 'prepare_ha_neutron'
            2. Delete cluster via cli
            3. Check cluster absence in the list

        Duration 25m

        """
        self.env.revert_snapshot("prepare_ha_neutron")

        remote = self.env.d_env.get_admin_remote()
        cluster_id = self.fuel_web.get_last_created_cluster()
        assert_true(
            remote.execute('fuel --env {0} env delete'.format(cluster_id))
            ['exit_code'] == 0)
        try:
            wait(lambda:
                 remote.execute(
                     "fuel env |  awk '{print $1}' |  tail -n 1 | grep '^.$'")
                 ['exit_code'] == 1, timeout=60 * 6)
        except TimeoutError:
            raise TimeoutError(
                "cluster {0} was not deleted".format(cluster_id))
        assert_false(
            check_cluster_presence(cluster_id, self.env.postgres_actions),
            "cluster {0} is found".format(cluster_id))
Example #23
0
    def find(self, key, value):
        LOG.info('Search for {} for {}'.format(key, value))
        search_request_body = '{' +\
            '  "query": {' +\
            '   "simple_query_string": {' +\
            '     "query": "{}",'.format(value) +\
            '     "analyze_wildcard" : "true",' +\
            '     "fields" : ["{}"],'.format(key) +\
            '     "default_operator": "AND"' +\
            '     }' +\
            ' },' +\
            '  "size": 1' +\
            '}'
        LOG.info('Search by {}'.format(search_request_body))

        def is_found():
            def temporary_status():
                res = self.es.search(index='_all', body=search_request_body)
                return res['hits']['total'] != 0
            return temporary_status

        predicate = is_found()
        helpers.wait(predicate, timeout=300,
                     timeout_msg='Timeout waiting, result from elastic')

        es_raw = self.es.search(index='_all', body=search_request_body)
        if es_raw['timed_out']:
            raise RuntimeError('Elastic search timeout exception')

        return ElasticSearchResult(key, value, es_raw['hits']['total'], es_raw)
    def spawn_three_vms_across_three_virt_nodes(self):
        """Spawn three vm nodes across three slave nodes

        Scenario:
            1. Create cluster
            2. Assign compute and virt roles to three slave nodes
            3. Upload VM configuration for one VM to each slave node
            4. Spawn VMs
            5. Wait till VMs become available for allocation

        Duration: 60m
        """

        self.env.revert_snapshot("ready_with_3_slaves")

        checkers.enable_feature_group(self.env, "advanced")

        cluster_id = self.fuel_web.create_cluster(
            name=self.__class__.__name__,
            mode=settings.DEPLOYMENT_MODE_HA,
            settings={
                'net_provider': 'neutron',
                'net_segment_type': settings.NEUTRON_SEGMENT['tun']
            })

        asserts.assert_true(settings.HARDWARE['slave_node_memory'] >= 1024,
                            "Wrong SLAVE_NODE_MEMORY value: {0}."
                            "Please allocate more than 1024Mb.".
                            format(settings.HARDWARE['slave_node_memory']))

        self.fuel_web.update_nodes(
            cluster_id,
            {
                'slave-01': ['compute', 'virt'],
                'slave-02': ['compute', 'virt'],
                'slave-03': ['compute', 'virt']
            })

        hw_nodes = self.fuel_web.client.list_cluster_nodes(cluster_id)
        for node in hw_nodes:
            self.fuel_web.client.create_vm_nodes(
                node['id'],
                [
                    {
                        "id": 1,
                        "mem": 1,
                        "cpu": 1
                    }
                ])

        self.fuel_web.spawn_vms_wait(cluster_id)
        wait(lambda: len(self.fuel_web.client.list_nodes()) == 6,
             timeout=60 * 120,
             timeout_msg=("Timeout waiting 6 available nodes, "
                          "current nodes: \n{0}" + '\n'.join(
                              ['Name: {0}, status: {1}, online: {2}'.
                               format(i['name'], i['status'], i['online'])
                               for i in self.fuel_web.client.list_nodes()])))

        self.env.make_snapshot("spawn_three_vms_across_three_virt_nodes")
    def check_starting_resources(self):
        """Check starting pacemaker resources"""

        logger.info(
            "Waiting {} seconds for changing pacemaker status of {}".format(
                self.pacemaker_restart_timeout,
                self.primary_controller_fqdn))
        time.sleep(self.pacemaker_restart_timeout)

        with self.fuel_web.get_ssh_for_node(
                self.primary_controller.name) as remote:

            def checking_health_disk_attribute_is_not_present():
                logger.info(
                    "Checking for '#health_disk' attribute "
                    "is not present on node {}".format(
                        self.primary_controller_fqdn))
                cibadmin_status_xml = remote.check_call(
                    'cibadmin --query --scope status').stdout_str
                pcs_attribs = get_pacemaker_nodes_attributes(
                    cibadmin_status_xml)
                return '#health_disk' not in pcs_attribs[
                    self.primary_controller_fqdn]

            wait(checking_health_disk_attribute_is_not_present,
                 timeout=self.pcs_check_timeout,
                 timeout_msg="Attribute #health_disk was appeared "
                             "in attributes on node {} in {} seconds".format(
                                 self.primary_controller_fqdn,
                                 self.pcs_check_timeout))

            self.fuel_web.assert_ha_services_ready(self.cluster_id)
Example #26
0
    def test_wait(self, sleep):

        predicate = mock.Mock(return_value=True)
        result = helpers.wait(predicate, interval=1, timeout=1)

        self.assertTrue(result)
        predicate.assert_called_once()
        sleep.assert_not_called()

        sleep.reset_mock()
        predicate.reset_mock()

        predicate.return_value = 2
        result = helpers.wait(predicate, interval=2, timeout=2)

        self.assertEqual(result, 2)
        predicate.assert_called_once()
        sleep.assert_not_called()

        sleep.reset_mock()
        predicate.reset_mock()
        predicate.return_value = False

        self.assertRaises(
            error.TimeoutError,
            helpers.wait,
            predicate, interval=1, timeout=1)
        predicate.assert_called()
    def separate_rabbit_service_restart(self):
        """Restart one rabbit node

        Scenario:
            1. Revert snapshot separate_rabbit_service
            2. Restart rabbit node that is master
            3. Wait HA is working
            4. Run OSTF

        Duration 30m
        """
        self.env.revert_snapshot("separate_rabbit_service")
        cluster_id = self.fuel_web.get_last_created_cluster()
        # restart rabbit master node
        rabbit_node = self.fuel_web.get_rabbit_master_node(
            self.env.d_env.nodes().slaves[3].name)
        self.fuel_web.warm_restart_nodes([rabbit_node])
        wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            rabbit_node)['online'], timeout=60 * 5)

        self.fuel_web.assert_ha_services_ready(cluster_id)
        self.fuel_web.assert_os_services_ready(cluster_id, timeout=15 * 60)

        self.fuel_web.run_ostf(
            cluster_id=cluster_id)
Example #28
0
    def simple_flat_node_deletion(self):
        """Remove controller from cluster in simple mode with flat nova-network

         Scenario:
            1. Revert "simple flat" environment
            2. Remove controller nodes
            3. Deploy changes
            4. Verify node returns to unallocated pull

        """
        self.env.revert_snapshot("deploy_simple_flat")

        cluster_id = self.fuel_web.get_last_created_cluster()
        nailgun_nodes = self.fuel_web.update_nodes(
            cluster_id, {'slave-01': ['controller']}, False, True)
        task = self.fuel_web.deploy_cluster(cluster_id)
        self.fuel_web.assert_task_success(task)
        nodes = filter(lambda x: x["pending_deletion"] is True, nailgun_nodes)
        assert_true(
            len(nodes) == 1, "Verify 1 node has pending deletion status"
        )
        wait(
            lambda: self.fuel_web.is_node_discovered(nodes[0]),
            timeout=10 * 60
        )
Example #29
0
    def update_nodes(self, cluster_id, nodes_dict, pending_addition=True, pending_deletion=False):
        # update nodes in cluster
        nodes_data = []
        for node_name in nodes_dict:
            devops_node = self.environment.get_virtual_environment().node_by_name(node_name)

            wait(lambda: self.get_nailgun_node_by_devops_node(devops_node)["online"], timeout=60 * 2)
            node = self.get_nailgun_node_by_devops_node(devops_node)
            assert_true(node["online"], "Node {} is online".format(node["mac"]))

            node_data = {
                "cluster_id": cluster_id,
                "id": node["id"],
                "pending_addition": pending_addition,
                "pending_deletion": pending_deletion,
                "pending_roles": nodes_dict[node_name],
                "name": "{}_{}".format(node_name, "_".join(nodes_dict[node_name])),
            }
            nodes_data.append(node_data)

        # assume nodes are going to be updated for one cluster only
        cluster_id = nodes_data[-1]["cluster_id"]
        node_ids = [str(node_info["id"]) for node_info in nodes_data]
        self.client.update_nodes(nodes_data)

        nailgun_nodes = self.client.list_cluster_nodes(cluster_id)
        cluster_node_ids = map(lambda _node: str(_node["id"]), nailgun_nodes)
        assert_true(all([node_id in cluster_node_ids for node_id in node_ids]))

        self.update_nodes_interfaces(cluster_id)

        return nailgun_nodes
Example #30
0
    def ha_haproxy_termination(self):
        """Terminate haproxy on all controllers one by one

        Scenario:
            1. Terminate haproxy
            2. Wait while it is being restarted
            3. Verify it is restarted
            4. Go to another controller
            5. Run OSTF

        Snapshot deploy_ha

        """
        self.env.revert_snapshot("deploy_ha")

        for devops_node in self.env.nodes().slaves[:3]:
            remote = self.fuel_web.get_ssh_for_node(devops_node.name)
            remote.check_call('kill -9 $(pidof haproxy)')

            mysql_started = lambda: \
                len(remote.check_call(
                    'ps aux | grep "/usr/sbin/haproxy"')['stdout']) == 3
            wait(mysql_started, timeout=20)
            assert_true(mysql_started(), 'haproxy restarted')

        cluster_id = self.fuel_web.client.get_cluster_id(
            self.__class__.__name__)

        self.fuel_web.run_ostf(
            cluster_id=cluster_id,
            test_sets=['ha', 'smoke', 'sanity'])
Example #31
0
 def wrapper(*args, **kwargs):
     result = func(*args, **kwargs)
     try:
         if settings.UPLOAD_PATCHSET:
             if not settings.GERRIT_REFSPEC:
                 raise ValueError('REFSPEC should be set for CI tests.')
             logger.info("Uploading new patchset from {0}".format(
                 settings.GERRIT_REFSPEC))
             remote = args[0].environment.get_admin_remote()
             remote.upload(settings.PATCH_PATH.rstrip('/'),
                           '/tmp/fuel-ostf')
             remote.execute('source /opt/fuel_plugins/ostf/bin/activate; '
                            'cd /tmp/fuel-ostf; python setup.py develop')
             remote.execute('/etc/init.d/supervisord restart')
             helpers.wait(lambda: "RUNNING" in remote.execute(
                 "supervisorctl status ostf | awk\
                                '{print $2}'")['stdout'][0],
                          timeout=60)
             logger.info("OSTF status: RUNNING")
     except Exception as e:
         logger.error("Could not upload patch set {e}".format(e=e))
         raise
     return result
    def separate_keystone_service_restart(self):
        """Restart one keystone node

        Scenario:
            1. Revert snapshot separate_keystone_service
            2. Restart keystone
            3. Wait HA is working
            4. Run OSTF

        Duration 30m
        """
        self.env.revert_snapshot("separate_keystone_service")
        cluster_id = self.fuel_web.get_last_created_cluster()
        # restart one keystone node
        keystone_node = self.env.d_env.nodes().slaves[3]
        self.fuel_web.warm_restart_nodes([keystone_node])
        wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            keystone_node)['online'],
             timeout=60 * 5)
        self.fuel_web.assert_ha_services_ready(cluster_id)
        self.fuel_web.assert_os_services_ready(cluster_id, timeout=15 * 60)

        self.fuel_web.run_ostf(cluster_id=cluster_id)
    def reschedule_router_manually(os_conn, router_id):
        router_l3_agents = os_conn.get_l3_agent_ids(router_id)
        if not router_l3_agents:
            raise NotFound("l3 agent hosting router with id:{}"
                           " not found.".format(router_id))
        l3_agent = router_l3_agents[0]
        logger.debug("l3 agent id is {0}".format(l3_agent))

        another_l3_agents = os_conn.get_available_l3_agents_ids(l3_agent)
        if not another_l3_agents:
            raise NotFound("another l3 agent except l3 agent with id:{}"
                           " not found.".format(l3_agent))
        another_l3_agent = another_l3_agents[0]
        logger.debug("another l3 agent is {0}".format(another_l3_agent))

        os_conn.remove_l3_from_router(l3_agent, router_id)
        os_conn.add_l3_to_router(another_l3_agent, router_id)
        err_msg = ("l3 agent with id:{l3_1} don't start hosting router "
                   "with id:{router} after remove l3 agent with id:{l3_2} "
                   "as a hosting this router during 5 minutes.")
        wait(lambda: os_conn.get_l3_agent_ids(router_id), timeout=60 * 5,
             timeout_msg=err_msg.format(l3_1=l3_agent, router=router_id,
                                        l3_2=another_l3_agent))
Example #34
0
    def delete_environment(self):
        """Delete existing environment
        and verify nodes returns to unallocated state

        Scenario:
            1. Revert "deploy_ha_one_controller" environment
            2. Delete environment
            3. Verify node returns to unallocated pull

        Duration 15m
        """
        self.env.revert_snapshot("deploy_ha_one_controller_neutron")

        cluster_id = self.fuel_web.get_last_created_cluster()
        self.fuel_web.client.delete_cluster(cluster_id)
        nailgun_nodes = self.fuel_web.client.list_nodes()
        nodes = filter(lambda x: x["pending_deletion"] is True, nailgun_nodes)
        assert_true(
            len(nodes) == 2, "Verify 2 node has pending deletion status")
        wait(lambda: self.fuel_web.is_node_discovered(nodes[0]) and self.
             fuel_web.is_node_discovered(nodes[1]),
             timeout=10 * 60,
             interval=15)
Example #35
0
    def ha_haproxy_termination(self):
        """Terminate haproxy on all controllers one by one

        Scenario:
            1. Terminate haproxy
            2. Wait while it is being restarted
            3. Verify it is restarted
            4. Go to another controller

        Snapshot deploy_ha

        """
        self.env.revert_snapshot("deploy_ha")

        for devops_node in self.env.nodes().slaves[:3]:
            remote = self.fuel_web.get_ssh_for_node(devops_node.name)
            remote.check_call('kill -9 $(pidof haproxy)')

            mysql_started = lambda: \
                len(remote.check_call(
                    'ps aux | grep "/usr/sbin/haproxy"')['stdout']) == 3
            wait(mysql_started, timeout=20)
            assert_true(mysql_started(), 'haproxy restarted')
Example #36
0
    def bootstrap_nodes(self,
                        devops_nodes,
                        timeout=settings.BOOTSTRAP_TIMEOUT,
                        skip_timesync=False):
        """Lists registered nailgun nodes
        Start vms and wait until they are registered on nailgun.
        :rtype : List of registered nailgun nodes
        """
        # self.dhcrelay_check()

        for node in devops_nodes:
            logger.info("Bootstrapping node: {}".format(node.name))
            node.start()
            # TODO(aglarendil): LP#1317213 temporary sleep
            # remove after better fix is applied
            time.sleep(5)

        with TimeStat("wait_for_nodes_to_start_and_register_in_nailgun"):
            wait(lambda: all(self.nailgun_nodes(devops_nodes)), 15, timeout)

        if not skip_timesync:
            self.sync_time()
        return self.nailgun_nodes(devops_nodes)
Example #37
0
    def migrate_server(self, server, host, timeout):
        curr_host = self.get_srv_host_name(server)
        logger.debug("Current compute host is {0}".format(curr_host))
        logger.debug("Start live migration of instance")
        server.live_migrate(host._info['host_name'])
        try:
            helpers.wait(
                lambda: self.get_instance_detail(server).status == "ACTIVE",
                timeout=timeout)
        except TimeoutError:
            logger.debug("Instance do not became active after migration")
            asserts.assert_true(
                self.get_instance_detail(server).status == "ACTIVE",
                "Instance do not become Active after live migration, "
                "current status is {0}".format(
                    self.get_instance_detail(server).status))

        asserts.assert_true(
            self.get_srv_host_name(
                self.get_instance_detail(server)) != curr_host,
            "Server did not migrate")
        server = self.get_instance_detail(server.id)
        return server
Example #38
0
    def test_wait(self, sleep, time):
        time.return_value = 1
        predicate = mock.Mock(return_value=True)

        result = helpers.wait(predicate, interval=0, timeout=0)

        self.assertTrue(result)
        predicate.assert_called_once()
        time.assert_called_once()
        sleep.assert_not_called()

        time.reset_mock()
        time.return_value = 1
        sleep.reset_mock()
        predicate.reset_mock()
        predicate.return_value = True

        result = helpers.wait(predicate, interval=2, timeout=2)

        self.assertEqual(result, 2)
        predicate.assert_called_once()
        sleep.assert_not_called()
        time.assert_has_calls([mock.call(), mock.call()])

        time.reset_mock()
        time.return_value = 1
        sleep.reset_mock()
        predicate.reset_mock()
        predicate.return_value = False

        self.assertRaises(error.TimeoutError,
                          helpers.wait,
                          predicate,
                          interval=2,
                          timeout=-2)
        sleep.assert_not_called()
        time.assert_has_calls([mock.call(), mock.call()])
Example #39
0
    def ha_haproxy_termination(self):
        if not self.env.d_env.has_snapshot(self.snapshot_name):
            raise SkipTest()

        self.env.revert_snapshot(self.snapshot_name)

        for devops_node in self.env.d_env.nodes().slaves[:3]:
            with self.fuel_web.get_ssh_for_node(devops_node.name) as remote:
                remote.check_call('kill -9 $(pidof haproxy)')

                def haproxy_started():
                    ret = remote.execute(
                        '[ -f /var/run/haproxy.pid ] && '
                        '[ "$(ps -p $(cat /var/run/haproxy.pid) -o pid=)" == '
                        '"$(pidof haproxy)" ]')
                    return ret['exit_code'] == 0

            wait(haproxy_started, timeout=20)
            assert_true(haproxy_started(), 'haproxy restarted')

        cluster_id = self.fuel_web.client.get_cluster_id(
            self.__class__.__name__)

        # sometimes keystone is not available right after haproxy
        # restart thus ostf tests fail with corresponding error
        # about unavailability of the service. In order to consider this
        # we do preliminary execution of sanity set

        # 2 minutes more that enough for keystone to be available
        # after haproxy restart
        timeout = 120

        self.fuel_web.assert_os_services_ready(cluster_id=cluster_id,
                                               timeout=timeout)

        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])
Example #40
0
    def cli_cluster_deletion(self):
        """Delete a cluster using Fuel CLI

        Scenario:
            1. Revert snapshot 'cli_selected_nodes_deploy'
            2. Delete cluster via cli
            3. Check cluster absence in the list

        Duration 25m

        """
        self.env.revert_snapshot("cli_selected_nodes_deploy")

        cluster_id = self.fuel_web.get_last_created_cluster()

        nodes = self.fuel_web.client.list_cluster_nodes(cluster_id)
        online_nodes = [node for node in nodes if node['online']]
        if nodes != online_nodes:
            logger.error('Some slaves do not become online after revert!!'
                         ' Expected {0} Actual {1}'.format(
                             nodes, online_nodes))

        self.ssh_manager.execute_on_remote(
            ip=self.ssh_manager.admin_ip,
            cmd='fuel --env {0} env delete --force'.format(cluster_id))

        wait(lambda: self.ssh_manager.execute_on_remote(
            ip=self.ssh_manager.admin_ip,
            cmd="fuel env |  awk '{print $1}' |  tail -n 1 | "
            "grep '^.$'",
            raise_on_assert=False)['exit_code'] == 1,
             timeout=60 * 10,
             timeout_msg='cluster {0} was not deleted'.format(cluster_id))

        assert_false(
            check_cluster_presence(cluster_id, self.env.postgres_actions),
            "cluster {0} is found".format(cluster_id))
Example #41
0
    def delete_cluster_with_custom_nodegroup(self):
        """Delete env, check nodes from custom nodegroup can't bootstrap

        Scenario:
        1. Revert snapshot with cluster with nodes in custom nodegroup
        2. Delete cluster
        3. Check nodes from custom nodegroup can't bootstrap
        4. Reset nodes from custom nodegroup
        5. Check nodes from custom nodegroup can't bootstrap

        Duration 15m
        """

        self.show_step(1, initialize=True)
        self.env.revert_snapshot('deploy_controllers_from_custom_nodegroup')
        cluster_id = self.fuel_web.get_last_created_cluster()
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)

        self.show_step(2)
        custom_nodes = self.env.d_env.nodes().slaves[3:6]

        self.fuel_web.delete_env_wait(cluster_id)

        self.show_step(3)
        logger.info('Wait five nodes online for 900 seconds..')
        wait(lambda: len(self.fuel_web.client.list_nodes()) == 5,
             timeout=15 * 60,
             timeout_msg='Timeout while waiting five nodes '
             'to become online')

        logger.info('Wait all nodes from custom nodegroup become '
                    'in error state..')
        # check all custom in error state
        for slave in custom_nodes:
            wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(slave)[
                'status'] == 'error',
                 timeout=15 * 60,
                 timeout_msg='Node {} not changed state to '
                 'error'.format(slave.name))
            logger.info('Node {} changed state to error'.format(slave.name))

        self.show_step(4)
        logger.info('Rebooting nodes from custom nodegroup..')
        self.fuel_web.cold_restart_nodes(custom_nodes, wait_online=False)

        self.show_step(5)
        logger.info('Wait custom nodes are not online for 600 seconds..')
        try:
            wait(lambda: any(
                self.fuel_web.get_nailgun_node_by_devops_node(slave)['online']
                for slave in custom_nodes),
                 timeout=10 * 60)
            raise AssertionError('Some nodes online')
        except TimeoutError:
            logger.info('Nodes are offline')

        self.env.make_snapshot("delete_cluster_with_custom_nodegroup")
    def separate_horizon_service_add_delete_node(self):
        """Add and delete horizon node

        Scenario:
            1. Revert snapshot separate_horizon_service
            2. Add one horizon node and re-deploy cluster
            3. Run network verification
            4. Run OSTF
            5. Delete one horizon node
            6. Run network verification
            7. Run ostf

        Duration 30m
        """
        self.env.revert_snapshot("separate_horizon_service")
        cluster_id = self.fuel_web.get_last_created_cluster()

        node = {'slave-09': ['horizon']}
        self.fuel_web.update_nodes(cluster_id, node, True, False)

        self.fuel_web.deploy_cluster_wait(cluster_id)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['sanity', 'smoke', 'ha'])

        nailgun_node = self.fuel_web.update_nodes(cluster_id, node, False,
                                                  True)
        nodes = [
            _node for _node in nailgun_node
            if _node["pending_deletion"] is True
        ]
        self.fuel_web.deploy_cluster_wait(cluster_id, check_services=False)
        wait(lambda: self.fuel_web.is_node_discovered(nodes[0]),
             timeout=6 * 60)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['sanity', 'smoke', 'ha'])
Example #43
0
    def setup_environment(self,
                          custom=settings.CUSTOM_ENV,
                          build_images=settings.BUILD_IMAGES,
                          iso_connect_as=settings.ADMIN_BOOT_DEVICE,
                          security=settings.SECURITY_TEST):
        # Create environment and start the Fuel master node
        admin = self.d_env.nodes().admin
        self.d_env.start([admin])

        logger.info("Waiting for admin node to start up")
        wait(lambda: admin.driver.node_active(admin), 60)
        logger.info("Proceed with installation")
        # update network parameters at boot screen
        admin.send_keys(
            self.get_keys(admin,
                          custom=custom,
                          build_images=build_images,
                          iso_connect_as=iso_connect_as))
        if settings.SHOW_FUELMENU:
            self.wait_for_fuelmenu()
        else:
            self.wait_for_provisioning()

        self.set_admin_ssh_password()

        self.wait_for_external_config()
        if custom:
            self.setup_customisation()
        if security:
            nessus_node = NessusActions(self.d_env)
            nessus_node.add_nessus_node()
        # wait while installation complete

        self.admin_actions.modify_configs(self.d_env.router())
        self.kill_wait_for_external_config()
        self.wait_bootstrap()
        self.admin_actions.wait_for_fuel_ready()
    def delete_on_ready_ubuntu_bootstrap(self):
        """Delete cluster cluster in HA mode\
        with 1 controller on Ubuntu Bootstrap

        Scenario:
            1. Delete cluster
            2. Verify bootstrap on slaves

        Duration 30m
        Snapshot: delete_on_ready_ubuntu_bootstrap
        """
        if not self.env.revert_snapshot(
                'deploy_stop_on_deploying_ubuntu_bootstrap'):
            raise SkipTest()

        cluster_id = self.fuel_web.get_last_created_cluster()

        # Delete cluster, then verify bootstrap on slaves
        self.fuel_web.client.delete_cluster(cluster_id)

        # wait nodes go to reboot
        wait(lambda: not self.fuel_web.client.list_nodes(), timeout=10 * 60)

        # wait for nodes to appear after bootstrap
        wait(lambda: len(self.fuel_web.client.list_nodes()) == 3,
             timeout=10 * 60)

        nodes = self.env.d_env.get_nodes(
            name__in=["slave-01", "slave-02", "slave-03"])
        for node in nodes:
            with self.fuel_web.get_ssh_for_node(node.name) as slave_remote:
                checkers.verify_bootstrap_on_node(slave_remote,
                                                  os_type="ubuntu")

        self.env.make_snapshot(
            "delete_on_ready_ubuntu_bootstrap",
            is_make=True)
Example #45
0
def verify_instance_state(os_conn,
                          instances=None,
                          expected_state='ACTIVE',
                          boot_timeout=BOOT_TIMEOUT):
    """Verify that current state of each instance/s is expected.

    :param os_conn: type object, openstack
    :param instances: type list, list of created instances
    :param expected_state: type string, expected state of instance
    :param boot_timeout: type int, time in seconds to build instance
    """
    if not instances:
        instances = os_conn.nova.servers.list()
    for instance in instances:
        try:
            wait(lambda: os_conn.get_instance_detail(instance).status ==
                 expected_state,
                 timeout=BOOT_TIMEOUT)
        except TimeoutError:
            current_state = os_conn.get_instance_detail(instance).status
            assert_true(
                current_state == expected_state,
                "Timeout is reached. Current state of Vm {0} is {1}".format(
                    instance.name, current_state))
Example #46
0
    def assign_floating_ip(self, srv, use_neutron=False):
        if use_neutron:
            #   Find external net id for tenant
            nets = self.neutron.list_networks()['networks']
            err_msg = "Active external network not found in nets:{}"
            ext_net_ids = [
                net['id'] for net in nets
                if net['router:external'] and net['status'] == "ACTIVE"
            ]
            asserts.assert_true(ext_net_ids, err_msg.format(nets))
            net_id = ext_net_ids[0]
            #   Find instance port
            ports = self.neutron.list_ports(device_id=srv.id)['ports']
            err_msg = "Not found active ports for instance:{}"
            asserts.assert_true(ports, err_msg.format(srv.id))
            port = ports[0]
            #   Create floating IP
            body = {
                'floatingip': {
                    'floating_network_id': net_id,
                    'port_id': port['id']
                }
            }
            flip = self.neutron.create_floatingip(body)
            #   Wait active state for port
            port_id = flip['floatingip']['port_id']
            state = lambda: self.neutron.show_port(port_id)['port']['status']
            helpers.wait(lambda: state() == "ACTIVE")
            return flip['floatingip']

        fl_ips_pool = self.nova.floating_ip_pools.list()
        if fl_ips_pool:
            floating_ip = self.nova.floating_ips.create(
                pool=fl_ips_pool[0].name)
            self.nova.servers.add_floating_ip(srv, floating_ip)
            return floating_ip
Example #47
0
def update_ostf():
    logger.info("Uploading new package from {0}".format(
        settings.UPDATE_FUEL_PATH))
    ssh = SSHManager()
    pack_path = '/var/www/nailgun/fuel-ostf/'
    full_pack_path = os.path.join(pack_path, 'fuel-ostf*.noarch.rpm')
    ssh.upload_to_remote(ssh.admin_ip,
                         source=settings.UPDATE_FUEL_PATH.rstrip('/'),
                         target=pack_path)

    # Check old fuel-ostf package
    cmd = "rpm -q fuel-ostf"

    old_package = ssh.execute_on_remote(ssh.admin_ip, cmd=cmd)['stdout_str']
    logger.info('Current package version of '
                'fuel-ostf: {0}'.format(old_package))

    cmd = "rpm -qp {0}".format(full_pack_path)
    new_package = ssh.execute_on_remote(ssh.admin_ip, cmd=cmd)['stdout_str']
    logger.info('Package from review {0}'.format(new_package))

    if old_package == new_package:
        logger.info('Package {0} is installed'.format(new_package))
        return

    cmd = "service ostf stop"
    ssh.execute_on_remote(ssh.admin_ip, cmd=cmd)
    cmd = "service ostf status"
    helpers.wait(lambda: "dead" in ssh.execute_on_remote(
        ssh.admin_ip, cmd=cmd, raise_on_assert=False, assert_ec_equal=[3])[
            'stdout_str'],
                 timeout=60)
    logger.info("OSTF status: inactive")
    cmd = "rpm -e fuel-ostf"
    ssh.execute_on_remote(ssh.admin_ip, cmd=cmd)
    cmd = "rpm -Uvh --oldpackage {0}".format(full_pack_path)
    ssh.execute_on_remote(ssh.admin_ip, cmd=cmd)
    cmd = "rpm -q fuel-ostf"
    installed_package = ssh.execute_on_remote(ssh.admin_ip,
                                              cmd=cmd)['stdout_str']

    assert_equal(
        installed_package, new_package,
        "The new package {0} was not installed. Actual {1}".format(
            new_package, installed_package))
    cmd = "service ostf start"
    ssh.execute_on_remote(ssh.admin_ip, cmd=cmd)
    cmd = "service ostf status"
    helpers.wait(lambda: "running" in ssh.execute_on_remote(
        ssh.admin_ip, cmd=cmd)['stdout_str'],
                 timeout=60)
    cmd = "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:8777"
    helpers.wait(lambda: "401" in ssh.execute_on_remote(
        ssh.admin_ip, cmd=cmd, raise_on_assert=False)['stdout_str'],
                 timeout=60)
    logger.info("OSTF status: RUNNING")
Example #48
0
    def separate_rabbit_service_shutdown(self):
        """Shutdown one rabbit node

        Scenario:
            1. Revert snapshot separate_rabbit_service
            2. Destroy rabbit node that is master
            3. Wait HA is working
            4. Run OSTF

        Duration 30m
        """
        self.env.revert_snapshot("separate_rabbit_service")
        cluster_id = self.fuel_web.get_last_created_cluster()
        # destroy master rabbit node
        rabbit_node = self.fuel_web.get_rabbit_master_node(
            self.env.d_env.nodes().slaves[3].name)
        rabbit_node.destroy()
        wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node(
            rabbit_node)['online'], timeout=60 * 5)
        self.fuel_web.assert_ha_services_ready(cluster_id)
        self.fuel_web.assert_os_services_ready(cluster_id, timeout=15 * 60)

        self.fuel_web.run_ostf(
            cluster_id=cluster_id)
Example #49
0
    def create_instance_with_vmxnet3_adapter(self):
        """Create instance with vmxnet3 adapter."""
        public_ip = self.fuel_web.get_public_vip(self.cluster_id)
        os_conn = OpenStackActions(public_ip)

        image = os_conn.get_image(self.image_name)
        os_conn.update_image(image,
                             properties={"hw_vif_model": "VirtualVmxnet3"})
        flavor = os_conn.get_flavor_by_name('m1.small')
        sg = os_conn.get_security_group(self.sg_name)
        net = os_conn.get_network(self.net_name)
        vm = os_conn.create_server(image=image,
                                   availability_zone=self.vcenter_az,
                                   net_id=net['id'],
                                   security_groups=[sg],
                                   flavor_id=flavor.id,
                                   timeout=666)
        floating_ip = os_conn.assign_floating_ip(vm)
        helpers.wait(lambda: helpers.tcp_ping(floating_ip.ip, 22),
                     timeout=180,
                     timeout_msg="Node {ip} is not accessible by SSH.".format(
                         ip=floating_ip.ip))

        controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            self.cluster_id, ["controller"])[0]
        with self.fuel_web.get_ssh_for_nailgun_node(controller) as remote:
            cmd = '/usr/bin/lshw -class network | grep vmxnet3'
            res = os_conn.execute_through_host(remote,
                                               floating_ip.ip,
                                               cmd,
                                               creds=self.image_creds)
            logger.debug('OUTPUT: {}'.format(res))
            assert_equal(res['exit_code'], 0, "VMxnet3 driver is not found")

        os_conn.delete_instance(vm)
        os_conn.verify_srv_deleted(vm)
Example #50
0
    def create_bootable_volume_and_run_instance(self):
        """Create bootable volume and launch instance from it."""
        public_ip = self.fuel_web.get_public_vip(self.cluster_id)
        os_conn = OpenStackActions(public_ip)

        image = os_conn.get_image(self.vmware_image)
        vol = os_conn.create_volume(image_id=image.id,
                                    availability_zone=self.cinder_az)
        block_device_mapping = {'vda': vol.id}

        net = os_conn.get_network(self.net_name)
        vm = os_conn.create_server(availability_zone=self.vcenter_az,
                                   image=False,
                                   net_id=net['id'],
                                   block_device_mapping=block_device_mapping)
        floating_ip = os_conn.assign_floating_ip(vm)
        helpers.wait(lambda: helpers.tcp_ping(floating_ip.ip, 22),
                     timeout=180,
                     timeout_msg="Node {ip} is not accessible by SSH.".format(
                         ip=floating_ip.ip))

        os_conn.delete_instance(vm)
        os_conn.verify_srv_deleted(vm)
        os_conn.delete_volume_and_wait(vol)
Example #51
0
def check_connection_vms(ip_pair,
                         command='pingv4',
                         result_of_command=0,
                         timeout=30,
                         interval=5):
    """Check network connectivity between instances.

    :param ip_pair: type dict, {ip_from: [ip_to1, ip_to2, etc.]}
    :param command: type string, key from dictionary 'commands'
                    by default is 'pingv4'
    :param result_of_command: type integer, exit code of command execution
                              by default is 0
    :param timeout: wait to get expected result
    :param interval: interval of executing command
    """
    commands = {
        "pingv4": "ping -c 5 {}",
        "pingv6": "ping6 -c 5 {}",
        "arping": "sudo arping -I eth0 {}",
        "ssh": " "
    }

    msg = 'Command "{0}", Actual exit code is NOT {1}'
    for ip_from in ip_pair:
        with get_ssh_connection(ip_from, instance_creds[0],
                                instance_creds[1]) as ssh:
            for ip_to in ip_pair[ip_from]:
                logger.info("Check connection from {0} to {1}.".format(
                    ip_from, ip_to))
                cmd = commands[command].format(ip_to)

                wait(lambda: execute(ssh, cmd)['exit_code'] ==
                     result_of_command,
                     interval=interval,
                     timeout=timeout,
                     timeout_msg=msg.format(cmd, result_of_command))
Example #52
0
    def check_stopping_resources(self):
        """Check stopping pacemaker resources"""

        logger.info(
            "Waiting {} seconds for changing pacemaker status of {}".format(
                self.pacemaker_restart_timeout, self.primary_controller_fqdn))
        time.sleep(self.pacemaker_restart_timeout)

        with self.fuel_web.get_ssh_for_node(
                self.primary_controller.name) as remote:

            def checking_health_disk_attribute():
                logger.info("Checking for '#health_disk' attribute")
                cibadmin_status_xml = run_on_remote_get_results(
                    remote, 'cibadmin --query --scope status')['stdout_str']
                pcs_attribs = get_pacemaker_nodes_attributes(
                    cibadmin_status_xml)
                return '#health_disk' in pcs_attribs[
                    self.primary_controller_fqdn]

            def checking_for_red_in_health_disk_attribute():
                logger.info(
                    "Checking for '#health_disk' attribute have 'red' value")
                cibadmin_status_xml = run_on_remote_get_results(
                    remote, 'cibadmin --query --scope status')['stdout_str']
                pcs_attribs = get_pacemaker_nodes_attributes(
                    cibadmin_status_xml)
                return pcs_attribs[
                    self.primary_controller_fqdn]['#health_disk'] == 'red'

            def check_stopping_resources():
                logger.info("Checking for 'running_resources "
                            "attribute have '0' value")
                pcs_status = parse_pcs_status_xml(remote)
                pcs_attribs = get_pcs_nodes(pcs_status)
                return pcs_attribs[
                    self.primary_controller_fqdn]['resources_running'] == '0'

            wait(checking_health_disk_attribute,
                 "Attribute #health_disk wasn't appeared "
                 "in attributes on node {} in {} seconds".format(
                     self.primary_controller_fqdn, self.pcs_check_timeout),
                 timeout=self.pcs_check_timeout)

            wait(checking_for_red_in_health_disk_attribute,
                 "Attribute #health_disk doesn't have 'red' value "
                 "on node {} in {} seconds".format(
                     self.primary_controller_fqdn, self.pcs_check_timeout),
                 timeout=self.pcs_check_timeout)

            wait(check_stopping_resources,
                 "Attribute 'running_resources' doesn't have '0' value "
                 "on node {} in {} seconds".format(
                     self.primary_controller_fqdn, self.pcs_check_timeout),
                 timeout=self.pcs_check_timeout)
Example #53
0
    def create_and_attach_empty_volume(self):
        """Create and attach to instance empty volume."""
        mount_point = '/dev/sdb'

        public_ip = self.fuel_web.get_public_vip(self.cluster_id)
        os_conn = OpenStackActions(public_ip)

        vol = os_conn.create_volume(availability_zone=self.cinder_az)
        image = os_conn.get_image(self.vmware_image)
        net = os_conn.get_network(self.net_name)
        sg = os_conn.get_security_group(self.sg_name)
        vm = os_conn.create_server(image=image,
                                   availability_zone=self.vcenter_az,
                                   security_groups=[sg],
                                   net_id=net['id'],
                                   timeout=210)
        floating_ip = os_conn.assign_floating_ip(vm)
        helpers.wait(lambda: helpers.tcp_ping(floating_ip.ip, 22),
                     timeout=180,
                     timeout_msg="Node {ip} is not accessible by SSH.".format(
                         ip=floating_ip.ip))

        logger.info("Attaching volume via cli")
        ctrl_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            self.cluster_id, ["controller"])
        cmd = '. openrc; nova volume-attach {srv_id} {volume_id} {mount}' \
              ''.format(srv_id=vm.id, volume_id=vol.id, mount=mount_point)
        logger.debug('CMD: {}'.format(cmd))
        SSHManager().execute_on_remote(ctrl_nodes[0]['ip'], cmd)

        helpers.wait(lambda: os_conn.get_volume_status(vol) == "in-use",
                     timeout=30,
                     timeout_msg="Volume doesn't reach 'in-use' state")

        vm.reboot()
        sleep(10)
        helpers.wait(lambda: helpers.tcp_ping(floating_ip.ip, 22),
                     timeout=180,
                     timeout_msg="Node {ip} is not accessible by SSH.".format(
                         ip=floating_ip.ip))

        controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            self.cluster_id, ["controller"])[0]
        with self.fuel_web.get_ssh_for_nailgun_node(controller) as remote:
            cmd = 'sudo /sbin/fdisk -l | grep {}'.format(mount_point)
            res = remote.execute_through_host(hostname=floating_ip.ip,
                                              cmd=cmd,
                                              auth=cirros_auth)
            logger.debug('OUTPUT: {}'.format(res['stdout_str']))
            assert_equal(res['exit_code'], 0, "Attached volume is not found")

        os_conn.delete_instance(vm)
        os_conn.verify_srv_deleted(vm)
        os_conn.delete_volume(vol)
Example #54
0
def update_ostf(environment):
    logger.info("Uploading new package from {0}".format(
        settings.UPDATE_FUEL_PATH))
    pack_path = '/var/www/nailgun/fuel-ostf/'
    full_pack_path = os.path.join(pack_path, 'fuel-ostf*.noarch.rpm')

    with environment.d_env.get_admin_remote() as remote:
        remote.upload(settings.UPDATE_FUEL_PATH.rstrip('/'), pack_path)

    # Check old fuel-ostf package
    cmd = "rpm -q fuel-ostf"

    old_package = environment.base_actions.execute(cmd, exit_code=0)
    logger.info('Current package version of '
                'fuel-ostf: {0}'.format(old_package))

    cmd = "rpm -qp {0}".format(full_pack_path)
    new_package = environment.base_actions.execute(cmd)
    logger.info('Package from review {0}'.format(new_package))

    if old_package == new_package:
        logger.info('Package {0} is installed'.format(new_package))
        return

    cmd = "service ostf stop"
    environment.base_actions.execute(cmd)
    cmd = "service ostf status"
    helpers.wait(lambda: "dead" in environment.base_actions.execute(cmd),
                 timeout=60)
    logger.info("OSTF status: inactive")
    cmd = "rpm -e fuel-ostf"
    environment.base_actions.execute(cmd, exit_code=0)
    cmd = "rpm -Uvh --oldpackage {0}".format(full_pack_path)
    environment.base_actions.execute(cmd, exit_code=0)
    cmd = "rpm -q fuel-ostf"
    installed_package = environment.base_actions.execute(cmd)

    assert_equal(
        installed_package, new_package,
        "The new package {0} was not installed. Actual {1}".format(
            new_package, installed_package))
    cmd = "service ostf start"
    environment.base_actions.execute(cmd)
    cmd = "service ostf status"
    helpers.wait(lambda: "running" in environment.base_actions.execute(
        cmd, exit_code=0),
                 timeout=60)
    cmd = "curl -s -o /dev/null -w '%{http_code}' http://127.0.0.1:8777"
    helpers.wait(lambda: "401" in environment.base_actions.execute(cmd),
                 timeout=60)
    logger.info("OSTF status: RUNNING")
Example #55
0
    def migrate_vm_backed_with_ceph(self):
        """Check VM backed with ceph migration in ha mode with 1 controller

        Scenario:
            1. Create cluster
            2. Add 1 node with controller and ceph OSD roles
            3. Add 2 nodes with compute and ceph OSD roles
            4. Deploy the cluster
            5. Check ceph status
            6. Run OSTF
            7. Create a new VM, assign floating ip
            8. Migrate VM
            9. Check cluster and server state after migration
            10. Terminate VM
            11. Check that DHCP lease is not offered for MAC of deleted VM
            12. Create a new VM for migration, assign floating ip
            13. Create a volume and attach it to the VM
            14. Create filesystem on the new volume and mount it to the VM
            15. Migrate VM
            16. Mount the volume after migration
            17. Check cluster and server state after migration
            18. Terminate VM

        Duration 35m
        Snapshot vm_backed_with_ceph_live_migration
        """
        self.env.revert_snapshot("ready_with_3_slaves")

        self.show_step(1, initialize=True)

        cluster_id = self.fuel_web.create_cluster(
            name=self.__class__.__name__,
            mode=settings.DEPLOYMENT_MODE,
            settings={
                'volumes_ceph': True,
                'images_ceph': True,
                'ephemeral_ceph': True,
                'volumes_lvm': False,
            }
        )

        self.show_step(2)
        self.show_step(3)

        self.fuel_web.update_nodes(
            cluster_id,
            {
                'slave-01': ['controller', 'ceph-osd'],
                'slave-02': ['compute', 'ceph-osd'],
                'slave-03': ['compute', 'ceph-osd']
            }
        )
        creds = ("cirros", "test")

        self.show_step(4)

        # Cluster deploy
        self.fuel_web.deploy_cluster_wait(cluster_id)

        def _check():
            # Run volume test several times with hope that it pass
            test_path = ostf_test_mapping.OSTF_TEST_MAPPING.get(
                'Create volume and attach it to instance')
            logger.debug('Start to run test {0}'.format(test_path))
            self.fuel_web.run_single_ostf_test(
                cluster_id, test_sets=['smoke'],
                test_name=test_path)

        self.show_step(5)
        try:
            _check()
        except AssertionError:
            logger.debug(AssertionError)
            logger.debug("Test failed from first probe,"
                         " we sleep 60 second try one more time "
                         "and if it fails again - test will fails ")
            time.sleep(60)
            _check()

        self.show_step(6)

        # Run ostf
        self.fuel_web.run_ostf(cluster_id)

        self.show_step(7)

        # Create new server
        os = os_actions.OpenStackActions(
            self.fuel_web.get_public_vip(cluster_id))
        net_name = self.fuel_web.get_cluster_predefined_networks_name(
            cluster_id)['private_net']

        logger.info("Create new server")
        srv = os.create_server_for_migration(
            neutron=True,
            scenario='./fuelweb_test/helpers/instance_initial_scenario',
            label=net_name)
        logger.info("Srv is currently in status: {:s}".format(srv.status))

        # Prepare to DHCP leases checks
        net_name = self.fuel_web.get_cluster_predefined_networks_name(
            cluster_id)['private_net']
        srv_instance_ip = os.get_nova_instance_ip(srv, net_name=net_name)
        srv_host_name = self.fuel_web.find_devops_node_by_nailgun_fqdn(
            os.get_srv_hypervisor_name(srv),
            self.env.d_env.nodes().slaves[:3]).name
        net_id = os.get_network(net_name)['id']
        ports = os.get_neutron_dhcp_ports(net_id)
        dhcp_server_ip = ports[0]['fixed_ips'][0]['ip_address']
        with self.fuel_web.get_ssh_for_node(srv_host_name) as srv_remote_node:
            srv_instance_mac = os.get_instance_mac(srv_remote_node, srv)

        logger.info("Assigning floating ip to server")
        floating_ip = os.assign_floating_ip(srv)
        srv_host = os.get_srv_host_name(srv)
        logger.info("Server is on host {:s}".format(srv_host))

        wait(lambda: tcp_ping(floating_ip.ip, 22), timeout=120)

        def ssh_ready(remote, ip, creds):
            try:
                os.execute_through_host(remote, ip, '/bin/true', creds)
                return True
            except paramiko.AuthenticationException:
                logger.info("Authentication failed. Trying again in a minute.")
                time.sleep(60)
                return False

        with self.fuel_web.get_ssh_for_node("slave-01") as remote:
            wait(lambda: ssh_ready(remote, floating_ip.ip, creds), timeout=300)
            md5before = os.get_md5sum(
                "/home/test_file", remote, floating_ip.ip, creds)

        self.show_step(8)

        logger.info("Get available computes")
        avail_hosts = os.get_hosts_for_migr(srv_host)

        logger.info("Migrating server")
        new_srv = os.migrate_server(srv, avail_hosts[0], timeout=200)
        logger.info("Check cluster and server state after migration")

        wait(lambda: tcp_ping(floating_ip.ip, 22), timeout=120)

        with self.fuel_web.get_ssh_for_node("slave-01") as remote:
            md5after = os.get_md5sum(
                "/home/test_file", remote, floating_ip.ip, creds)

        assert_true(
            md5after in md5before,
            "Md5 checksums don`t match."
            "Before migration md5 was equal to: {bef}"
            "Now it equals: {aft}".format(bef=md5before, aft=md5after))

        self.show_step(9)

        with self.fuel_web.get_ssh_for_node("slave-01") as remote:
            res = os.execute_through_host(
                remote, floating_ip.ip,
                "ping -q -c3 -w10 {0} | grep 'received' |"
                " grep -v '0 packets received'"
                .format(settings.PUBLIC_TEST_IP), creds)
        logger.info("Ping {0} result on vm is: {1}"
                    .format(settings.PUBLIC_TEST_IP, res['stdout']))

        logger.info("Check Ceph health is ok after migration")
        self.fuel_web.check_ceph_status(cluster_id)

        logger.info(
            "Server is now on host {:s}".format(os.get_srv_host_name(new_srv)))

        self.show_step(10)

        logger.info("Terminate migrated server")
        os.delete_instance(new_srv)
        os.verify_srv_deleted(new_srv)

        self.show_step(11)
        # Check if the dhcp lease for instance still remains
        # on the previous compute node. Related Bug: #1391010
        _ip = self.fuel_web.get_nailgun_node_by_name('slave-01')['ip']
        with self.fuel_web.get_ssh_for_node('slave-01') as remote:
            dhcp_port_tag = ovs_get_tag_by_port(remote, ports[0]['id'])
            assert_false(checkers.check_neutron_dhcp_lease(_ip,
                                                           srv_instance_ip,
                                                           srv_instance_mac,
                                                           dhcp_server_ip,
                                                           dhcp_port_tag),
                         "Instance has been deleted, but it's DHCP lease "
                         "for IP:{0} with MAC:{1} still offers by Neutron DHCP"
                         " agent.".format(srv_instance_ip,
                                          srv_instance_mac))
        self.show_step(12)
        # Create a new server
        logger.info("Create a new server for migration with volume")
        srv = os.create_server_for_migration(
            neutron=True,
            scenario='./fuelweb_test/helpers/instance_initial_scenario',
            label=net_name)
        logger.info("Srv is currently in status: {:s}".format(srv.status))

        logger.info("Assigning floating ip to server")
        floating_ip = os.assign_floating_ip(srv)
        srv_host = os.get_srv_host_name(srv)
        logger.info("Server is on host {:s}".format(srv_host))

        self.show_step(13)
        logger.info("Create volume")
        vol = os.create_volume()
        logger.info("Attach volume to server")
        os.attach_volume(vol, srv)

        self.show_step(14)
        wait(lambda: tcp_ping(floating_ip.ip, 22), timeout=120)
        logger.info("Create filesystem and mount volume")

        with self.fuel_web.get_ssh_for_node("slave-01") as remote:
            wait(lambda: ssh_ready(remote, floating_ip.ip, creds), timeout=300)

            os.execute_through_host(
                remote,
                floating_ip.ip, 'sudo sh /home/mount_volume.sh', creds)

            os.execute_through_host(
                remote,
                floating_ip.ip, 'sudo touch /mnt/file-on-volume', creds)

        self.show_step(15)
        logger.info("Get available computes")
        avail_hosts = os.get_hosts_for_migr(srv_host)

        logger.info("Migrating server")
        new_srv = os.migrate_server(srv, avail_hosts[0], timeout=120)

        logger.info("Check cluster and server state after migration")
        wait(lambda: tcp_ping(floating_ip.ip, 22), timeout=120)

        self.show_step(16)
        logger.info("Mount volume after migration")
        with self.fuel_web.get_ssh_for_node("slave-01") as remote:
            out = os.execute_through_host(
                remote,
                floating_ip.ip, 'sudo mount /dev/vdb /mnt', creds)

        logger.info("out of mounting volume is: {:s}".format(out['stdout']))

        with self.fuel_web.get_ssh_for_node("slave-01") as remote:
            out = os.execute_through_host(
                remote,
                floating_ip.ip, "sudo ls /mnt", creds)
        assert_true("file-on-volume" in out['stdout'],
                    "File is absent in /mnt")

        self.show_step(17)
        logger.info("Check Ceph health is ok after migration")
        self.fuel_web.check_ceph_status(cluster_id)

        logger.info(
            "Server is now on host {:s}".format(os.get_srv_host_name(new_srv)))

        self.show_step(18)
        logger.info("Terminate migrated server")
        os.delete_instance(new_srv)
        os.verify_srv_deleted(new_srv)

        self.env.make_snapshot(
            "vm_backed_with_ceph_live_migration")
    def recovery_neutron_agents_after_restart(self):
        """Recovery of neutron agents after restart

        Scenario:
        1. Pre-condition - do steps from 'deploy_ha_cinder' test
        2. Kill neutron agents at all on one of the controllers.

           Pacemaker should restart it

           2.1 verify output crm status | grep -A1 "clone_p_neutron-l3-agent"
               have failed status for controller

           2.2 verify neutron-l3-proccess restarted
           by ps -aux | grep neutron-l3-agent

           2.3 verify output crm status | grep -A1 "clone_p_neutron-l3-agent"
               have started status for controller

        3. Verify networks
        4. Run OSTF tests

        Duration 20m
        Snapshot recovery_neutron_agents_after_restart
        """

        self.show_step(1, initialize=True)
        self.env.revert_snapshot('deploy_ha_cinder')

        self.show_step(2)
        neutron_agents = [
            {'name': 'neutron-openvswitch-agent',
             'resource': 'neutron-openvswitch-agent'},
            {'name': 'neutron-l3-agent',
             'resource': 'neutron-l3-agent'},
            {'name': 'neutron-dhcp-agent',
             'resource': 'neutron-dhcp-agent'},
            {'name': 'neutron-metadata-agent',
             'resource': 'neutron-metadata-agent'}
        ]

        cluster_id = self.fuel_web.get_last_created_cluster()
        controllers = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            cluster_id, roles=('controller',))
        assert_equal(len(controllers), 3,
                     'Environment does not have 3 controller nodes, '
                     'found {} nodes!'.format(len(controllers)))

        for agent in neutron_agents:
            target_controllers = self.fuel_web.get_pacemaker_resource_location(
                controllers[0]['fqdn'], agent['resource'])
            assert_true(len(target_controllers) >= 1,
                        "Didn't find controllers with "
                        "running {0} on it".format(agent['name']))
            target_controller = self.fuel_web.get_nailgun_node_by_devops_node(
                target_controllers[0])
            old_pids = self.ssh_manager.execute(
                target_controller['ip'],
                cmd='pgrep -f {}'.format(agent['name']))['stdout']
            assert_true(len(old_pids) > 0,
                        'PIDs of {0} not found on {1}'.format(
                            agent['name'], target_controller['name']))
            logger.debug('Old PIDs of {0} on {1}: {2}'.format(
                agent['name'], target_controller['name'], old_pids))
            result = self.ssh_manager.execute(
                target_controller['ip'],
                cmd='pkill -9 -f {}'.format(agent['name']))
            assert_equal(result['exit_code'], 0,
                         'Processes of {0} were not killed on {1}: {2}'.format(
                             agent['name'], target_controller['name'], result))
            wait(lambda: len(self.ssh_manager.execute(
                target_controller['ip'],
                cmd='pgrep -f {}'.format(agent['name']))['stdout']) > 0,
                timeout=60,
                timeout_msg='Neutron agent {0} was not recovered on node {1} '
                            'within 60 seconds!'.format(
                                agent['name'], target_controller['name']))
            new_pids = self.ssh_manager.execute(
                target_controller['ip'],
                cmd='pgrep -f {}'.format(agent['name']))['stdout']
            bad_pids = set(old_pids) & set(new_pids)
            assert_equal(len(bad_pids), 0,
                         '{0} processes with PIDs {1} were not '
                         'killed on {2}!'.format(agent['name'],
                                                 bad_pids,
                                                 target_controller['name']))

        self.show_step(3)
        self.fuel_web.verify_network(cluster_id)

        self.show_step(4)
        self.fuel_web.run_ostf(cluster_id, test_sets=['ha', 'smoke', 'sanity'])

        self.env.make_snapshot('recovery_neutron_agents_after_restart')
Example #57
0
 def wait_for_fuel_ready(self, timeout=300):
     wait(lambda: self.is_fuel_ready, timeout=timeout,
          timeout_msg="Fuel services are not ready, please check the "
                      "output of 'fuel-utils check_all")
Example #58
0
    def compute_stop_reinstallation(self):
        """Verify stop reinstallation of compute.

        Scenario:
            1. Revert the snapshot
            2. Create an OS volume and OS instance
            3. Mark 'cinder' and 'vm' partitions to be preserved
            4. Stop reinstallation process of compute
            5. Start the reinstallation process again
            6. Run network verification
            7. Run OSTF
            8. Verify that the volume is present and has 'available' status
               after the node reinstallation
            9. Verify that the VM is available and pingable
               after the node reinstallation

        Duration: 115m

        """
        self.env.revert_snapshot("node_reinstallation_env")

        cluster_id = self.fuel_web.get_last_created_cluster()

        # Create an OS volume
        os_conn = os_actions.OpenStackActions(
            self.fuel_web.get_public_vip(cluster_id))

        volume = os_conn.create_volume()

        # Create an OS instance
        cmp_host = os_conn.get_hypervisors()[0]

        net_label = self.fuel_web.get_cluster_predefined_networks_name(
            cluster_id)['private_net']

        vm = os_conn.create_server_for_migration(
            neutron=True,
            availability_zone="nova:{0}".format(cmp_host.hypervisor_hostname),
            label=net_label)
        vm_floating_ip = os_conn.assign_floating_ip(vm)
        devops_helpers.wait(
            lambda: devops_helpers.tcp_ping(vm_floating_ip.ip, 22),
            timeout=120)

        cmp_nailgun = self.fuel_web.get_nailgun_node_by_fqdn(
            cmp_host.hypervisor_hostname)

        # Mark 'cinder' and 'vm' partitions to be preserved
        with self.env.d_env.get_admin_remote() as remote:
            PartitionPreservation._preserve_partition(remote,
                                                      cmp_nailgun['id'],
                                                      "cinder")
            PartitionPreservation._preserve_partition(remote,
                                                      cmp_nailgun['id'], "vm")

        slave_nodes = self.fuel_web.client.list_cluster_nodes(cluster_id)
        devops_nodes = self.fuel_web.get_devops_nodes_by_nailgun_nodes(
            slave_nodes)

        logger.info('Stop reinstallation process')
        self._stop_reinstallation(self.fuel_web, cluster_id,
                                  [str(cmp_nailgun['id'])], devops_nodes)

        self.fuel_web.verify_network(cluster_id)
        logger.info('Start the reinstallation process again')
        NodeReinstallationEnv._reinstall_nodes(self.fuel_web, cluster_id,
                                               [str(cmp_nailgun['id'])])

        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id, test_sets=['ha', 'smoke', 'sanity'])

        # Verify that the created volume is still available
        try:
            volume = os_conn.cinder.volumes.get(volume.id)
        except NotFound:
            raise AssertionError(
                "{0} volume is not available after its {1} hosting node "
                "reinstallation".format(volume.id, cmp_nailgun['fqdn']))
        expected_status = "available"
        assert_equal(
            expected_status, volume.status,
            "{0} volume status is {1} after its {2} hosting node "
            "reinstallation. Expected status is {3}.".format(
                volume.id, volume.status, cmp_nailgun['fqdn'],
                expected_status))

        # Verify that the VM is still available
        try:
            os_conn.verify_instance_status(vm, 'ACTIVE')
        except AssertionError:
            raise AssertionError(
                "{0} VM is not available after its {1} hosting node "
                "reinstallation".format(vm.name, cmp_host.hypervisor_hostname))
        assert_true(
            devops_helpers.tcp_ping(vm_floating_ip.ip, 22),
            "{0} VM is not accessible via its {1} floating "
            "ip".format(vm.name, vm_floating_ip))
Example #59
0
 def wait_for_ready_container(self, timeout=300):
     wait(lambda: self.is_container_ready, timeout=timeout)
Example #60
0
    def separate_db_service_add_delete_node(self):
        """Add and delete database node

        Scenario:
            1. Revert snapshot separate_db_service
            2. Add one database node and re-deploy cluster
            3. Run network verification
            4. Run OSTF
            5. Check hiera hosts are the same for
               different group of roles
            6. Delete one database node
            7. Run network verification
            8. Run ostf
            9. Check hiera hosts are the same for
               different group of roles

        Duration 30m
        """
        self.env.revert_snapshot("separate_db_service")
        cluster_id = self.fuel_web.get_last_created_cluster()

        node = {'slave-09': ['standalone-database']}
        self.fuel_web.update_nodes(cluster_id, node, True, False)

        self.fuel_web.deploy_cluster_wait(cluster_id)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['sanity', 'smoke', 'ha'])

        checkers.check_hiera_hosts(
            self,
            self.fuel_web.client.list_cluster_nodes(cluster_id),
            cmd='hiera memcache_roles')

        database_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            cluster_id, ['standalone-database'])
        logger.debug("database nodes are {0}".format(database_nodes))
        checkers.check_hiera_hosts(self,
                                   database_nodes,
                                   cmd='hiera corosync_roles')

        nailgun_node = self.fuel_web.update_nodes(cluster_id, node, False,
                                                  True)
        nodes = filter(lambda x: x["pending_deletion"] is True, nailgun_node)
        self.fuel_web.deploy_cluster_wait(cluster_id, check_services=False)
        wait(lambda: self.fuel_web.is_node_discovered(nodes[0]),
             timeout=6 * 60)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['sanity', 'smoke', 'ha'])

        checkers.check_hiera_hosts(
            self,
            self.fuel_web.client.list_cluster_nodes(cluster_id),
            cmd='hiera memcache_roles')

        database_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            cluster_id, ['standalone-database'])
        logger.debug("database nodes are {0}".format(database_nodes))
        checkers.check_hiera_hosts(self,
                                   database_nodes,
                                   cmd='hiera corosync_roles')