def assert_cli_task_success(self, task, remote, timeout=70 * 60, interval=20): logger.info('Wait {timeout} seconds for task: {task}'.format( timeout=timeout, task=task)) start = time.time() try: wait(lambda: (self.get_task(remote, task['id'])['status'] not in ('pending', 'running')), interval=interval, timeout=timeout) except TimeoutError: raise TimeoutError( "Waiting timeout {timeout} sec was reached for task: {task}". format(task=task["name"], timeout=timeout)) took = time.time() - start task = self.get_task(remote, task['id']) logger.info( 'Task finished in {took} seconds with the result: {task}'.format( took=took, task=task)) assert_equal( task['status'], 'ready', "Task '{name}' has incorrect status. {} != {}".format( task['status'], 'ready', name=task["name"]))
def delete_cluster_with_custom_nodegroup(self): """Delete env, check nodes from custom nodegroup can't bootstrap Scenario: 1. Revert snapshot with cluster with nodes in custom nodegroup 2. Delete cluster 3. Check nodes from custom nodegroup can't bootstrap 4. Reset nodes from custom nodegroup 5. Check nodes from custom nodegroup can't bootstrap Duration 15m """ self.show_step(1, initialize=True) self.env.revert_snapshot('deploy_controllers_from_custom_nodegroup') cluster_id = self.fuel_web.get_last_created_cluster() self.fuel_web.assert_nodes_in_ready_state(cluster_id) self.show_step(2) custom_nodes = self.env.d_env.nodes().slaves[3:6] self.fuel_web.delete_env_wait(cluster_id) self.show_step(3) logger.info('Wait five nodes online for 900 seconds..') wait(lambda: len(self.fuel_web.client.list_nodes()) == 5, timeout=15 * 60) logger.info('Wait all nodes from custom nodegroup become ' 'in error state..') # check all custom in error state for slave in custom_nodes: try: wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node( slave)['status'] == 'error', timeout=15 * 60) logger.info('Node {} become error state'.format(slave.name, 'error')) except TimeoutError: raise TimeoutError('Node {} not become ' 'error state'.format(slave.name)) self.show_step(4) logger.info('Rebooting nodes from custom nodegroup..') self.fuel_web.cold_restart_nodes(custom_nodes, wait_online=False) self.show_step(5) logger.info('Wait custom nodes are not online for 600 seconds..') try: wait( lambda: any(self.fuel_web. get_nailgun_node_by_devops_node(slave)['online'] for slave in custom_nodes), timeout=10 * 60) assert 'Some nodes online' except TimeoutError: logger.info('Nodes are offline') self.env.make_snapshot("delete_cluster_with_custom_nodegroup")
def neutron_l3_migration_after_reset(self): """Check l3-agent rescheduling after reset non-primary controller Scenario: 1. Revert snapshot with neutron cluster 2. Manually reschedule router from primary controller to another one 3. Reset controller with l3-agent 4. Check l3-agent was rescheduled 5. Check network connectivity from instance via dhcp namespace 6. Run OSTF Snapshot deploy_ha_neutron """ self.env.revert_snapshot("deploy_ha_neutron") cluster_id = self.fuel_web.get_last_created_cluster() os_conn = os_actions.OpenStackActions( self.fuel_web.get_public_vip(cluster_id)) net_id = os_conn.get_network('net04')['id'] devops_node = self.get_node_with_dhcp(self, os_conn, net_id) remote = self.env.get_ssh_to_remote_by_name(devops_node.name) dhcp_namespace = ''.join( remote.execute( 'ip netns | grep {0}'.format(net_id))['stdout']).rstrip() logger.debug('dhcp namespace is {0}'.format(dhcp_namespace)) instance_ip = \ self.create_instance_with_keypair( os_conn, remote).addresses['net04'][0]['addr'] logger.debug('instance internal ip is {0}'.format(instance_ip)) router_id = os_conn.get_routers_ids()[0] self.reshedule_router_manually(os_conn, router_id) self.check_instance_connectivity(remote, dhcp_namespace, instance_ip) node_with_l3 = os_conn.get_l3_agent_hosts(router_id)[0] new_devops = self.get_node_with_l3(self, node_with_l3) self.fuel_web.warm_restart_nodes([new_devops]) try: wait(lambda: not node_with_l3 == os_conn.get_l3_agent_hosts( router_id)[0], timeout=60 * 3) except TimeoutError: raise TimeoutError( "l3 agent wasn't rescheduled, it is still {0}".format( os_conn.get_l3_agent_hosts(router_id)[0])) wait(lambda: os_conn.get_l3_agent_ids(router_id), timeout=60) self.check_instance_connectivity(remote, dhcp_namespace, instance_ip) self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha', 'smoke', 'sanity'])
def do_sync_time(self, ntps): # 0. 'ntps' can be filled by __init__() or outside the class if not ntps: raise ValueError("No servers were provided to synchronize " "the time in self.ntps") # 1. Stop NTPD service on nodes logger.debug("Stop NTPD service on nodes {0}".format( self.report_node_names(ntps))) for ntp in ntps: ntp.stop() # 2. Set actual time on all nodes via 'ntpdate' logger.debug( "Set actual time on all nodes via 'ntpdate' on nodes {0}".format( self.report_node_names(ntps))) for ntp in ntps: ntp.set_actual_time() if not self.is_synchronized(ntps): raise TimeoutError( "Time on nodes was not set with 'ntpdate':\n{0}".format( self.report_not_synchronized(ntps))) # 3. Start NTPD service on nodes logger.debug("Start NTPD service on nodes {0}".format( self.report_node_names(ntps))) for ntp in ntps: ntp.start() # 4. Wait for established peers logger.debug("Wait for established peers on nodes {0}".format( self.report_node_names(ntps))) for ntp in ntps: ntp.wait_peer() if not self.is_connected(ntps): raise TimeoutError("NTPD on nodes was not synchronized:\n" "{0}".format(self.report_not_connected(ntps)))
def task_wait(self, task, timeout, interval=5): logger.info('Wait for task %s %s seconds', task, timeout) try: wait(lambda: self.client.get_task(task['id'])['status'] != 'running', interval=interval, timeout=timeout) except TimeoutError: raise TimeoutError("Waiting task \"{task}\" timeout {timeout} sec " "was exceeded: ".format(task=task["name"], timeout=timeout)) return self.client.get_task(task['id'])
def task_wait_progress(self, task, timeout, interval=5, progress=None): try: logger.info('start to wait with timeout {0} ' 'interval {1}'.format(timeout, interval)) wait(lambda: self.client.get_task(task['id'])['progress'] >= progress, interval=interval, timeout=timeout) except TimeoutError: raise TimeoutError("Waiting task \"{task}\" timeout {timeout} sec " "was exceeded: ".format(task=task["name"], timeout=timeout)) return self.client.get_task(task['id'])
def check_slaves_are_ready(self): devops_nodes = [node for node in self.d_env.nodes().slaves if node.driver.node_active(node)] # Bug: 1455753 time.sleep(30) for node in devops_nodes: try: wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node( node)['online'], timeout=60 * 6) except TimeoutError: raise TimeoutError( "Node {0} does not become online".format(node.name)) return True
def hiera_deploy(self): """Deploy cluster with controller node only Scenario: 1. Start installation of master 2. Enter "fuelmenu" 3. Upload custom manifests 4. Kill "fuelmenu" pid 5. Deploy hiera manifest Duration 20m """ self.env.revert_snapshot("empty_custom_manifests") self.env.bootstrap_nodes( self.env.d_env.nodes().slaves[:1]) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=DEPLOYMENT_MODE ) self.fuel_web.update_nodes( cluster_id, {'slave-01': ['controller']} ) admin_ip = self.ssh_manager.admin_ip node_id = self.fuel_web.get_nailgun_node_by_devops_node( self.env.d_env.nodes().slaves[0])['id'] cmd = 'fuel node --node {0} --provision --env {1}'.format(node_id, cluster_id) self.ssh_manager.execute_on_remote(admin_ip, cmd) self.fuel_web.provisioning_cluster_wait(cluster_id) cmd = 'fuel node --node {0} --end hiera --env {1}'.format(node_id, cluster_id) self.ssh_manager.execute_on_remote(admin_ip, cmd) cmd = 'fuel task | grep deployment | awk \'{print $9}\'' try: wait(lambda: int( self.ssh_manager.execute_on_remote( admin_ip, cmd)['stdout'][0].rstrip()) == 100, timeout=120) except TimeoutError: raise TimeoutError("hiera manifest was not applied") cmd = 'ssh -q node-{0} "hiera role"'.format(node_id) role = self.ssh_manager.execute_on_remote( admin_ip, cmd)['stdout'][0].rstrip() assert_equal(role, 'primary-controller', "node with deployed hiera " "was not found")
def wait_for_ready_containers(self, timeout=300): cont_actions = [] for container in self.list_containers(): cont_action = BaseActions(self.admin_remote) cont_action.container = container cont_actions.append(cont_action) try: wait(lambda: all([ cont_action.is_container_ready for cont_action in cont_actions ]), timeout=timeout) except TimeoutError: failed_containers = [ x.container for x in cont_actions if not x.is_container_ready ] raise TimeoutError( "Container(s) {0} failed to start in {1} seconds.".format( failed_containers, timeout))
def cli_cluster_deletion(self): """Delete a cluster using Fuel CLI Scenario: 1. Revert snapshot 'cli_selected_nodes_deploy' 2. Delete cluster via cli 3. Check cluster absence in the list Duration 25m """ self.env.revert_snapshot("cli_selected_nodes_deploy") cluster_id = self.fuel_web.get_last_created_cluster() nodes = self.fuel_web.client.list_cluster_nodes(cluster_id) online_nodes = [node for node in nodes if node['online']] if nodes != online_nodes: logger.error( 'Some slaves do not become online after revert!!' ' Expected {0} Actual {1}'.format(nodes, online_nodes)) self.ssh_manager.execute_on_remote( ip=self.ssh_manager.admin_ip, cmd='fuel --env {0} env delete --force'.format(cluster_id) ) try: wait(lambda: self.ssh_manager.execute_on_remote( ip=self.ssh_manager.admin_ip, cmd="fuel env | awk '{print $1}' | tail -n 1 | " "grep '^.$'", raise_on_assert=False)['exit_code'] == 1, timeout=60 * 10) except TimeoutError: raise TimeoutError( "cluster {0} was not deleted".format(cluster_id)) assert_false( check_cluster_presence(cluster_id, self.env.postgres_actions), "cluster {0} is found".format(cluster_id))
def wait(predicate, interval=5, timeout=None): """ wait(predicate, interval=5, timeout=None) - wait until predicate will become True. Returns number of seconds that is left or 0 if timeout is None. Options: interval - seconds between checks. timeout - raise TimeoutError if predicate won't become True after this amount of seconds. 'None' disables timeout. """ start_time = time.time() while not predicate(): if timeout and start_time + timeout < time.time(): raise TimeoutError("Waiting timed out") seconds_to_sleep = interval if timeout: seconds_to_sleep = max( 0, min(seconds_to_sleep, start_time + timeout - time.time())) time.sleep(seconds_to_sleep) return timeout + start_time - time.time() if timeout else 0
def shutdown_ceph_for_all(self): """Shutdown of Neutron Vxlan, ceph for all cluster Scenario: 1. Create cluster with Neutron Vxlan, ceph for all, ceph replication factor - 3 2. Add 3 controller, 2 compute, 3 ceph nodes 3. Verify Network 4. Deploy cluster 5. Verify networks 6. Run OSTF 7. Create 2 volumes and 2 instances with attached volumes 8. Fill ceph storages up to 30%(15% for each instance) 9. Shutdown of all nodes 10. Wait 5 minutes 11. Start cluster 12. Wait until OSTF 'HA' suite passes 13. Verify networks 14. Run OSTF tests Duration 230m """ self.env.revert_snapshot('ready_with_9_slaves') self.show_step(1, initialize=True) data = { 'tenant': 'failover', 'user': '******', 'password': '******', "net_provider": 'neutron', "net_segment_type": settings.NEUTRON_SEGMENT['tun'], 'volumes_ceph': True, 'images_ceph': True, 'ephemeral_ceph': True, 'objects_ceph': True, 'osd_pool_size': '3', 'volumes_lvm': False, } cluster_id = self.fuel_web.create_cluster(name=self.__class__.__name__, settings=data) self.show_step(2) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['controller'], 'slave-03': ['controller'], 'slave-04': ['compute'], 'slave-05': ['compute'], 'slave-06': ['ceph-osd'], 'slave-07': ['ceph-osd'], 'slave-08': ['ceph-osd'] }) self.show_step(3) self.fuel_web.verify_network(cluster_id) self.show_step(4) self.fuel_web.deploy_cluster_wait(cluster_id) self.show_step(5) self.fuel_web.verify_network(cluster_id) self.show_step(6) self.fuel_web.run_ostf(cluster_id) self.show_step(7) os = os_actions.OpenStackActions( controller_ip=self.fuel_web.get_public_vip(cluster_id), user='******', passwd='failover', tenant='failover') net_name = self.fuel_web.get_cluster_predefined_networks_name( cluster_id)['private_net'] hypervisors = os.get_hypervisors() hypervisor_name = hypervisors[0].hypervisor_hostname instance_1 = os.create_server_for_migration( neutron=True, availability_zone="nova:{0}".format(hypervisor_name), label=net_name) logger.info("New instance {0} created on {1}".format( instance_1.id, hypervisor_name)) floating_ip_1 = os.assign_floating_ip(instance_1) logger.info("Floating address {0} associated with instance {1}".format( floating_ip_1.ip, instance_1.id)) hypervisor_name = hypervisors[1].hypervisor_hostname instance_2 = os.create_server_for_migration( neutron=True, availability_zone="nova:{0}".format(hypervisor_name), label=net_name) logger.info("New instance {0} created on {1}".format( instance_2.id, hypervisor_name)) floating_ip_2 = os.assign_floating_ip(instance_2) logger.info("Floating address {0} associated with instance {1}".format( floating_ip_2.ip, instance_2.id)) self.show_step(8) ceph_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id, ['ceph-osd']) total_ceph_size = 0 for node in ceph_nodes: total_ceph_size += \ self.fuel_web.get_node_partition_size(node['id'], 'ceph') percent_15_mb = 0.15 * total_ceph_size percent_15_gb = percent_15_mb // 1024 volume_size = int(percent_15_gb + 1) volume_1 = os.create_volume(size=volume_size) volume_2 = os.create_volume(size=volume_size) logger.info('Created volumes: {0}, {1}'.format(volume_1.id, volume_2.id)) ip = self.fuel_web.get_nailgun_node_by_name("slave-01")['ip'] logger.info("Attach volumes") cmd = 'nova volume-attach {srv_id} {volume_id} /dev/vdb' self.ssh_manager.execute_on_remote( ip=ip, cmd='. openrc; ' + cmd.format(srv_id=instance_1.id, volume_id=volume_1.id)) self.ssh_manager.execute_on_remote( ip=ip, cmd='. openrc; ' + cmd.format(srv_id=instance_2.id, volume_id=volume_2.id)) cmds = [ 'sudo sh -c "/usr/sbin/mkfs.ext4 /dev/vdb"', 'sudo sh -c "/bin/mount /dev/vdb /mnt"', 'sudo sh -c "/usr/bin/nohup' ' /bin/dd if=/dev/zero of=/mnt/bigfile ' 'bs=1M count={} &"'.format(int(percent_15_mb)) ] md5s = {floating_ip_1.ip: '', floating_ip_2.ip: ''} with self.fuel_web.get_ssh_for_node("slave-01") as remote: for ip in [floating_ip_1.ip, floating_ip_2.ip]: for cmd in cmds: res = remote.execute_through_host(hostname=ip, cmd=cmd, auth=cirros_auth) logger.info('RESULT for {}: {}'.format( cmd, utils.pretty_log(res))) logger.info('Wait 7200 untill "dd" ends') for _ in range(720): cmd = 'ps -ef |grep -v grep| grep "dd if" ' res = remote.execute_through_host(hostname=ip, cmd=cmd, auth=cirros_auth) if res['exit_code'] != 0: break time.sleep(10) logger.debug('Wait another 10 sec -' ' totally waited {} sec'.format(10 * _)) else: raise TimeoutError('BigFile has not been' ' created yet, after 7200 sec') cmd = 'md5sum /mnt/bigfile' md5s[ip] = remote.execute_through_host( hostname=ip, cmd=cmd, auth=cirros_auth)['stdout'] self.show_step(9) nodes = {'compute': [], 'controller': [], 'ceph-osd': []} for role in nodes: nailgun_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id, [role]) nodes[role] = self.fuel_web.get_devops_nodes_by_nailgun_nodes( nailgun_nodes) self.fuel_web.warm_shutdown_nodes(nodes['compute']) self.fuel_web.warm_shutdown_nodes(nodes['controller']) self.fuel_web.warm_shutdown_nodes(nodes['ceph-osd']) self.show_step(10) time.sleep(300) self.show_step(11) self.fuel_web.warm_start_nodes(nodes['ceph-osd']) self.fuel_web.warm_start_nodes(nodes['controller']) self.show_step(12) self.fuel_web.assert_ha_services_ready(cluster_id) self.fuel_web.warm_start_nodes(nodes['compute']) self.fuel_web.assert_os_services_ready(cluster_id) self.show_step(13) self.fuel_web.verify_network(cluster_id) self.show_step(14) self.fuel_web.run_ostf(cluster_id) with self.fuel_web.get_ssh_for_node("slave-01") as remote: for ip in [floating_ip_1.ip, floating_ip_2.ip]: cmd = 'md5sum /mnt/bigfile' md5 = remote.execute_through_host(hostname=ip, cmd=cmd, auth=cirros_auth)['stdout'] assert_equal( md5, md5s[ip], "Actual md5sum {0} doesnt match" " with old one {1} on {2}".format(md5, md5s[ip], ip))
def cli_node_deletion_check(self): """Destroy node and remove it from Nailgun using Fuel CLI Scenario: 1. Revert snapshot 'cli_selected_nodes_deploy' 2. Check 'slave-03' is present 3. Destroy 'slave-03' 4. Wait until 'slave-03' become offline 5. Delete offline 'slave-03' from db 6. Check presence of 'slave-03' Duration 30m """ self.env.revert_snapshot("cli_selected_nodes_deploy") node_id = self.fuel_web.get_nailgun_node_by_devops_node( self.env.d_env.nodes().slaves[2])['id'] assert_true(check_cobbler_node_exists(self.ssh_manager.admin_ip, node_id), "node-{0} is not found".format(node_id)) self.env.d_env.nodes().slaves[2].destroy() try: wait( lambda: not self.fuel_web.get_nailgun_node_by_devops_node( self.env.d_env.nodes(). slaves[2])['online'], timeout=60 * 6) except TimeoutError: raise admin_ip = self.ssh_manager.admin_ip cmd = 'fuel node --node-id {0} --delete-from-db'.format(node_id) res = self.ssh_manager.execute_on_remote(admin_ip, cmd) assert_true( res['exit_code'] == 0, "Offline node-{0} was not" "deleted from database".format(node_id)) cmd = "fuel node | awk '{{print $1}}' | grep -w '{0}'".format(node_id) try: wait( lambda: not self.ssh_manager.execute_on_remote( admin_ip, cmd, raise_on_assert=False)['exit_code'] == 0, timeout=60 * 4) except TimeoutError: raise TimeoutError( "After deletion node-{0} is found in fuel list".format( node_id)) is_cobbler_node_exists = check_cobbler_node_exists( self.ssh_manager.admin_ip, node_id) assert_false(is_cobbler_node_exists, "After deletion node-{0} is found in cobbler list". format(node_id)) cmd = "fuel env | tail -n 1 | awk {'print $1'}" cluster_id = self.ssh_manager.execute_on_remote( admin_ip, cmd)['stdout_str'] self.fuel_web.verify_network(cluster_id) self.fuel_web.run_ostf( cluster_id=cluster_id, test_sets=['ha', 'smoke', 'sanity'], should_fail=1)
def neutron_l3_migration_after_destroy(self): """Check l3-agent rescheduling after destroy non-primary controller Scenario: 1. Revert snapshot with neutron cluster 2. Manually reschedule router from primary controller to another one 3. Destroy controller with l3-agent 4. Check l3-agent was rescheduled 5. Check network connectivity from instance via dhcp namespace 6. Run OSTF Snapshot deploy_ha_neutron """ self.env.revert_snapshot("deploy_ha_neutron") cluster_id = self.fuel_web.get_last_created_cluster() os_conn = os_actions.OpenStackActions( self.fuel_web.get_public_vip(cluster_id)) net_id = os_conn.get_network('net04')['id'] devops_node = self.get_node_with_dhcp(self, os_conn, net_id) remote = self.env.get_ssh_to_remote_by_name(devops_node.name) dhcp_namespace = ''.join( remote.execute( 'ip netns | grep {0}'.format(net_id))['stdout']).rstrip() logger.debug('dhcp namespace is {0}'.format(dhcp_namespace)) instance_ip = \ self.create_instance_with_keypair( os_conn, remote).addresses['net04'][0]['addr'] logger.debug('instance internal ip is {0}'.format(instance_ip)) router_id = os_conn.get_routers_ids()[0] self.reshedule_router_manually(os_conn, router_id) self.check_instance_connectivity(remote, dhcp_namespace, instance_ip) node_with_l3 = os_conn.get_l3_agent_hosts(router_id)[0] new_devops = self.get_node_with_l3(self, node_with_l3) new_devops.destroy() wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node( new_devops)['online'], timeout=60 * 10) self.fuel_web.wait_mysql_galera_is_up( [n.name for n in set(self.env.nodes().slaves[:3]) - {new_devops}]) try: wait(lambda: not node_with_l3 == os_conn.get_l3_agent_hosts( router_id)[0], timeout=60 * 3) except TimeoutError: raise TimeoutError( "l3 agent wasn't rescheduled, it is still {0}".format( os_conn.get_l3_agent_hosts(router_id)[0])) wait(lambda: os_conn.get_l3_agent_ids(router_id), timeout=60) self.check_instance_connectivity(remote, dhcp_namespace, instance_ip) @retry(count=3, delay=120) def run_single_test(cluster_id): self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['smoke'], test_name='fuel_health.tests.smoke.' 'test_neutron_actions.TestNeutron.' 'test_check_neutron_objects_creation') run_single_test(cluster_id) self.fuel_web.run_ostf( cluster_id=cluster_id, test_sets=['ha', 'smoke', 'sanity'], should_fail=1, failed_test_name=['Check that required services are running'])
def shutdown_primary_controller_ceph(self): """Shutdown primary controller for Neutron on ceph cluster Scenario: 1. Pre-condition - do steps from 'deploy_ha_ceph' test 2. Create 1 instance 3. Set floating IP associated with created instance 4. Shut down primary controller 5. Wait for HA services to be ready 6. Verify networks 7. Ensure connectivity to external resources from VM 8. Run OSTF tests Duration: XXX min Snapshot: shutdown_primary_controller_ceph """ self.show_step(1, initialize=True) self.env.revert_snapshot('deploy_ha_ceph') cluster_id = self.fuel_web.get_last_created_cluster() controllers = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id, roles=('controller', )) assert_equal( len(controllers), 3, 'Environment does not have 3 controller nodes, ' 'found {} nodes!'.format(len(controllers))) self.show_step(2) os = os_actions.OpenStackActions( controller_ip=self.fuel_web.get_public_vip(cluster_id), user='******', passwd='failover', tenant='failover') net_name = self.fuel_web.get_cluster_predefined_networks_name( cluster_id)['private_net'] hypervisors = os.get_hypervisors() hypervisor_name = hypervisors[0].hypervisor_hostname instance_1 = os.create_server_for_migration( neutron=True, availability_zone="nova:{0}".format(hypervisor_name), label=net_name) logger.info("New instance {0} created on {1}".format( instance_1.id, hypervisor_name)) self.show_step(3) floating_ip_1 = os.assign_floating_ip(instance_1) logger.info("Floating address {0} associated with instance {1}".format( floating_ip_1.ip, instance_1.id)) self.show_step(4) target_controller = self.fuel_web.get_nailgun_primary_node( self.fuel_web.get_devops_node_by_nailgun_node(controllers[0])) self.fuel_web.warm_shutdown_nodes([target_controller]) self.show_step(5) self.fuel_web.assert_ha_services_ready(cluster_id, should_fail=1) self.show_step(6) self.fuel_web.verify_network(cluster_id) self.show_step(7) try: wait(lambda: tcp_ping(floating_ip_1.ip, 22), timeout=120) except TimeoutError: raise TimeoutError('Can not ping instance' ' by floating ip {0}'.format(floating_ip_1.ip)) self.show_step(8) self.fuel_web.run_ostf(cluster_id) self.env.make_snapshot('shutdown_primary_controller_ceph')
def neutron_l3_migration_after_reset(self): """Check l3-agent rescheduling after reset non-primary controller Scenario: 1. Revert snapshot with neutron cluster 2. Create an instance with a key pair 3. Manually reschedule router from primary controller to another one 4. Reset controller with l3-agent 5. Check l3-agent was rescheduled 6. Check network connectivity from instance via dhcp namespace 7. Run OSTF Duration 30m """ self.env.revert_snapshot("deploy_ha_neutron") cluster_id = self.fuel_web.get_last_created_cluster() os_conn = os_actions.OpenStackActions( self.fuel_web.get_public_vip(cluster_id)) # Get remote to the controller with running DHCP agent for net04 net_id = os_conn.get_network('net04')['id'] devops_node = self.get_node_with_dhcp(self, os_conn, net_id) _ip = self.fuel_web.get_nailgun_node_by_name(devops_node.name)['ip'] remote = self.env.d_env.get_ssh_to_remote(_ip) dhcp_namespace = ''.join(remote.execute('ip netns | grep {0}'.format( net_id))['stdout']).rstrip() logger.debug('dhcp namespace is {0}'.format(dhcp_namespace)) instance_keypair = os_conn.create_key(key_name='instancekey') instance_ip = self.create_instance_with_keypair( os_conn, instance_keypair.name).addresses['net04'][0]['addr'] logger.debug('instance internal ip is {0}'.format(instance_ip)) router_id = os_conn.get_routers_ids()[0] self.reshedule_router_manually(os_conn, router_id) self.check_instance_connectivity(remote, dhcp_namespace, instance_ip, instance_keypair) remote.clear() node_with_l3 = os_conn.get_l3_agent_hosts(router_id)[0] new_devops = self.get_node_with_l3(self, node_with_l3) self.fuel_web.warm_restart_nodes([new_devops]) wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node( new_devops)['online'], timeout=60 * 5) # Wait for HA services ready self.fuel_web.assert_ha_services_ready(cluster_id) self.fuel_web.wait_mysql_galera_is_up(['slave-01', 'slave-02', 'slave-03']) try: wait(lambda: not node_with_l3 == os_conn.get_l3_agent_hosts( router_id)[0], timeout=60 * 3) except TimeoutError: raise TimeoutError( "l3 agent wasn't rescheduled, it is still {0}".format( os_conn.get_l3_agent_hosts(router_id)[0])) wait(lambda: os_conn.get_l3_agent_ids(router_id), timeout=60) devops_node = self.get_node_with_dhcp(self, os_conn, net_id) _ip = self.fuel_web.get_nailgun_node_by_devops_node(devops_node)['ip'] remote = self.env.d_env.get_ssh_to_remote(_ip) self.check_instance_connectivity(remote, dhcp_namespace, instance_ip, instance_keypair) remote.clear() self.fuel_web.run_ostf( cluster_id=cluster_id, test_sets=['ha', 'smoke', 'sanity'])
def delete_custom_nodegroup(self): """Delete nodegroup, check its nodes are marked as 'error' Scenario: 1. Revert snapshot with cluster with nodes in custom nodegroup 2. Save cluster network configuration 3. Reset cluster 4. Remove custom nodegroup 5. Check nodes from custom nodegroup have 'error' status 6. Re-create custom nodegroup and upload saved network configuration 7. Assign 'error' nodes to new nodegroup 8. Check nodes from custom nodegroup are in 'discover' state Duration 30m """ self.show_step(1, initialize=True) self.env.revert_snapshot('deploy_controllers_from_custom_nodegroup') cluster_id = self.fuel_web.get_last_created_cluster() self.fuel_web.assert_nodes_in_ready_state(cluster_id) self.show_step(2) network_config = self.fuel_web.client.get_networks(cluster_id) self.show_step(3) custom_nodes = self.env.d_env.nodes().slaves[3:6] self.fuel_web.stop_reset_env_wait(cluster_id) logger.info('Waiting for all nodes online for 900 seconds...') wait(lambda: all(n['online'] for n in self.fuel_web.client.list_cluster_nodes(cluster_id)), timeout=15 * 60) self.show_step(4) custom_nodegroup = [ng for ng in self.fuel_web.client.get_nodegroups() if ng['name'] == NODEGROUPS[1]['name']][0] self.fuel_web.client.delete_nodegroup(custom_nodegroup['id']) self.show_step(5) logger.info('Wait all nodes from custom nodegroup become ' 'in error state..') for slave in custom_nodes: try: wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node( slave)['status'] == 'error', timeout=60 * 5) logger.info('Node {} is in "error" state'.format(slave.name)) except TimeoutError: raise TimeoutError('Node {} status wasn\'t changed ' 'to "error"!'.format(slave.name)) self.show_step(6) new_nodegroup = self.fuel_web.client.create_nodegroup( cluster_id, NODEGROUPS[1]['name']) logger.debug('Updating custom nodegroup ID in network configuration..') network_config_new = self.fuel_web.client.get_networks(cluster_id) for network in network_config['networks']: if network['group_id'] == custom_nodegroup['id']: network['group_id'] = new_nodegroup['id'] for new_network in network_config_new['networks']: if new_network['name'] == network['name'] and \ new_network['group_id'] == network['group_id']: network['id'] = new_network['id'] self.fuel_web.client.update_network( cluster_id, network_config['networking_parameters'], network_config['networks']) self.show_step(7) self.fuel_web.client.assign_nodegroup( new_nodegroup['id'], [self.fuel_web.get_nailgun_node_by_devops_node(node) for node in custom_nodes]) self.show_step(8) logger.info('Wait all nodes from custom nodegroup become ' 'in discover state..') for slave in custom_nodes: try: wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node( slave)['status'] == 'discover', timeout=60 * 5) logger.info('Node {} is in "discover" state'.format( slave.name)) except TimeoutError: raise TimeoutError('Node {} status wasn\'t changed ' 'to "discover"!'.format(slave.name)) self.env.make_snapshot("delete_custom_nodegroup")
def block_net_traffic_cinder(self): """Block network traffic of whole environment Scenario: 1. Revert environment deploy_ha_cinder 2. Create 2 volumes and 2 instances with attached volumes 3. Fill cinder storages up to 30% 4. Start Rally 5. Block traffic of all networks 6. Sleep 5 minutes 7. Unblock traffic of all networks 8. Wait until cluster nodes become online 9. Verify networks 10. Run OSTF tests Duration: 40 min Snapshot: block_net_traffic """ self.show_step(1) self.env.revert_snapshot('deploy_ha_cinder') cluster_id = self.fuel_web.get_last_created_cluster() self.show_step(2) os = os_actions.OpenStackActions( controller_ip=self.fuel_web.get_public_vip(cluster_id), user='******', passwd='failover', tenant='failover') net_name = self.fuel_web.get_cluster_predefined_networks_name( cluster_id)['private_net'] hypervisors = os.get_hypervisors() hypervisor_name = hypervisors[0].hypervisor_hostname instance_1 = os.create_server_for_migration( neutron=True, availability_zone="nova:{0}".format(hypervisor_name), label=net_name) logger.info("New instance {0} created on {1}".format( instance_1.id, hypervisor_name)) floating_ip_1 = os.assign_floating_ip(instance_1) logger.info("Floating address {0} associated with instance {1}".format( floating_ip_1.ip, instance_1.id)) hypervisor_name = hypervisors[1].hypervisor_hostname instance_2 = os.create_server_for_migration( neutron=True, availability_zone="nova:{0}".format(hypervisor_name), label=net_name) logger.info("New instance {0} created on {1}".format( instance_2.id, hypervisor_name)) floating_ip_2 = os.assign_floating_ip(instance_2) logger.info("Floating address {0} associated with instance {1}".format( floating_ip_2.ip, instance_2.id)) self.show_step(3) cinder_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id, ['cinder']) total_cinder_size = 0 for node in cinder_nodes: total_cinder_size += \ self.fuel_web.get_node_partition_size(node['id'], 'cinder') percent_15_mb = 0.15 * total_cinder_size percent_15_gb = percent_15_mb // 1024 volume_size = int(percent_15_gb + 1) volume_1 = os.create_volume(size=volume_size) volume_2 = os.create_volume(size=volume_size) logger.info('Created volumes: {0}, {1}'.format(volume_1.id, volume_2.id)) ip = self.fuel_web.get_nailgun_node_by_name("slave-01")['ip'] logger.info("Attach volumes") cmd = 'nova volume-attach {srv_id} {volume_id} /dev/vdb' self.ssh_manager.execute_on_remote( ip=ip, cmd='. openrc; ' + cmd.format(srv_id=instance_1.id, volume_id=volume_1.id)) self.ssh_manager.execute_on_remote( ip=ip, cmd='. openrc; ' + cmd.format(srv_id=instance_2.id, volume_id=volume_2.id)) cmds = [ 'sudo sh -c "/usr/sbin/mkfs.ext4 /dev/vdb"', 'sudo sh -c "/bin/mount /dev/vdb /mnt"', 'sudo sh -c "/usr/bin/nohup' ' /bin/dd if=/dev/zero of=/mnt/bigfile ' 'bs=1M count={} &"'.format(int(percent_15_mb)) ] md5s = {floating_ip_1.ip: '', floating_ip_2.ip: ''} with self.fuel_web.get_ssh_for_node("slave-01") as remote: for ip in [floating_ip_1.ip, floating_ip_2.ip]: for cmd in cmds: res = os.execute_through_host(remote, ip, cmd) logger.info('RESULT for {}: {}'.format( cmd, utils.pretty_log(res))) logger.info('Wait 7200 untill "dd" ends') for _ in range(720): cmd = 'ps -ef |grep -v grep| grep "dd if" ' res = os.execute_through_host(remote, ip, cmd) if res['exit_code'] != 0: break time.sleep(15) logger.debug('Wait another 15 sec -' ' totally waited {} sec'.format(10 * _)) else: raise TimeoutError('BigFile has not been' ' created yet, after 7200 sec') cmd = 'md5sum /mnt/bigfile' md5s[ip] = os.execute_through_host(remote, ip, cmd)['stdout'] self.show_step(4) assert_true(settings.PATCHING_RUN_RALLY, 'PATCHING_RUN_RALLY was not set in true') rally_benchmarks = {} benchmark_results = {} for tag in set(settings.RALLY_TAGS): rally_benchmarks[tag] = RallyBenchmarkTest( container_repo=settings.RALLY_DOCKER_REPO, environment=self.env, cluster_id=cluster_id, test_type=tag) benchmark_results[tag] = rally_benchmarks[tag].run() logger.debug(benchmark_results[tag].show()) self.show_step(5) nodes = [ node for node in self.env.d_env.get_nodes() if node.driver.node_active(node) ] for interface in nodes[1].interfaces: if interface.is_blocked: raise Exception('Interface {0} is blocked'.format(interface)) else: interface.network.block() self.show_step(6) time.sleep(60 * 5) self.show_step(7) for interface in nodes[1].interfaces: if interface.network.is_blocked: interface.network.unblock() else: raise Exception( 'Interface {0} was not blocked'.format(interface)) self.show_step(8) self.fuel_web.wait_nodes_get_online_state(nodes[1:]) self.show_step(9) self.fuel_web.verify_network(cluster_id) self.show_step(10) try: self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha', 'smoke', 'sanity']) except AssertionError: time.sleep(600) self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha', 'smoke', 'sanity'])
def ha_sequential_rabbit_master_failover(self): if not self.env.d_env.has_snapshot(self.snapshot_name): raise SkipTest() self.env.revert_snapshot(self.snapshot_name) cluster_id = self.fuel_web.client.get_cluster_id( self.__class__.__name__) net_provider = self.fuel_web.client.get_cluster( cluster_id)['net_provider'] # Wait until MySQL Galera is UP on some controller self.fuel_web.wait_mysql_galera_is_up(['slave-02']) # Check keystone is fine after revert try: self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha', 'sanity']) except AssertionError: time.sleep(600) self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha', 'sanity']) public_vip = self.fuel_web.get_public_vip(cluster_id) os_conn = os_actions.OpenStackActions(public_vip) # Create instance instance = os_conn.create_server_for_migration(neutron=True) \ if net_provider == 'neutron' \ else os_conn.create_server_for_migration() # Check ping logger.info("Assigning floating ip to server") floating_ip = os_conn.assign_floating_ip(instance) # check instance try: wait(lambda: tcp_ping(floating_ip.ip, 22), timeout=120) except TimeoutError: raise TimeoutError('Can not ping instance' ' by floating ip {0}'.format(floating_ip.ip)) # get master rabbit controller master_rabbit = self.fuel_web.get_rabbit_master_node( self.env.d_env.nodes().slaves[0].name) # suspend devops node with master rabbit master_rabbit.suspend(False) # Wait until Nailgun marked suspended controller as offline try: wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node( master_rabbit)['online'], timeout=60 * 5) except TimeoutError: raise TimeoutError('Node {0} does' ' not become offline ' 'in nailgun'.format(master_rabbit.name)) # check ha try: self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha']) except AssertionError: time.sleep(300) self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha'], should_fail=2) # check instance try: wait(lambda: tcp_ping(floating_ip.ip, 22), timeout=120) except TimeoutError: raise TimeoutError('Can not ping instance' ' by floating ip {0}'.format(floating_ip.ip)) active_slaves = [ slave for slave in self.env.d_env.nodes().slaves[0:4] if slave.name != master_rabbit.name ] second_master_rabbit = self.fuel_web.get_rabbit_master_node( active_slaves[0].name) # suspend devops node with master rabbit second_master_rabbit.suspend(False) # Wait until Nailgun marked suspended controller as offline try: wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node( second_master_rabbit)['online'], timeout=60 * 5) except TimeoutError: raise TimeoutError('Node {0} does' ' not become offline ' 'in nailgun'.format(second_master_rabbit.name)) # turn on 1-st master master_rabbit.resume(False) # Wait until Nailgun marked suspended controller as online try: wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node( master_rabbit)['online'], timeout=60 * 5) except TimeoutError: raise TimeoutError('Node {0} does' ' not become online ' 'in nailgun'.format(master_rabbit.name)) self.fuel_web.check_ceph_status( cluster_id, offline_nodes=[ self.fuel_web.get_nailgun_node_by_devops_node( second_master_rabbit)['id'] ]) # check ha try: self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha']) except AssertionError: time.sleep(600) self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha'], should_fail=2) # turn on second master second_master_rabbit.resume(False) # Wait until Nailgun marked suspended controller as online try: wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node( second_master_rabbit)['online'], timeout=60 * 5) except TimeoutError: raise TimeoutError('Node {0} does' ' not become online' 'in nailgun'.format(second_master_rabbit.name)) self.fuel_web.check_ceph_status(cluster_id) # check ha try: self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha']) except AssertionError: time.sleep(600) self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha']) # ping instance wait(lambda: tcp_ping(floating_ip.ip, 22), timeout=120) # delete instance os_conn = os_actions.OpenStackActions(public_vip) os_conn.delete_instance(instance) # run ostf try: self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha', 'smoke', 'sanity']) except AssertionError: time.sleep(600) self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha', 'smoke', 'sanity'])
def review_fuel_cli_one_node_deploy(self): """ Revert snapshot, apply changes from review and deploy cluster with controller node only over cli. Scenario: 1. Revert snapshot 'ready_with_1_slave' 2. Apply changes from review 3. Bootstrap 1 node 4. Show releases list 5. Create cluster over cli 6. Update networks 7. Update SSL settings 8. List environments 9. Add and provision 1 node with controller role 10. Deploy node 11. Delete cluster Duration 20m """ if not UPDATE_FUEL: raise exceptions.FuelQAVariableNotSet(UPDATE_FUEL, 'true') self.show_step(1, initialize=True) self.env.revert_snapshot('ready_with_1_slaves') target_path = '/var/www/nailgun/python-fuelclient/' package_name = 'python-fuelclient' with self.env.d_env.get_admin_remote() as remote: self.show_step(2) self.upload_package(remote, target_path, package_name) self.replace_package(remote, package_name=package_name, package_path=target_path) self.show_step(3) self.env.bootstrap_nodes(self.env.d_env.nodes().slaves[:1]) node_id = [ self.fuel_web.get_nailgun_node_by_devops_node( self.env.d_env.nodes().slaves[0])['id'] ] with self.env.d_env.get_admin_remote() as remote: self.show_step(3) # get releases list self.show_step(4) list_release_cmd = 'fuel release --json' list_release_res = run_on_remote(remote, list_release_cmd, jsonify=True) active_release_id = [ release['id'] for release in list_release_res if release['is_deployable'] ] asserts.assert_true( active_release_id, 'Can not find deployable release. ' 'Current release data {0}'.format(list_release_res)) # Create an environment self.show_step(5) cmd = ('fuel env create --name={0} --release={1} ' '--nst=tun --json'.format(self.__class__.__name__, active_release_id[0])) env_result = run_on_remote(remote, cmd, jsonify=True) cluster_id = env_result['id'] cluster_name = env_result['name'] # Update network parameters self.show_step(6) self.update_cli_network_configuration(cluster_id, remote) # Update SSL configuration self.show_step(7) self.update_ssl_configuration(cluster_id, remote) self.show_step(8) cmd = 'fuel env --json' env_list_res = run_on_remote(remote, cmd, jsonify=True) asserts.assert_true( cluster_id in [cluster['id'] for cluster in env_list_res], 'Can not find created before environment' ' id in fuel environment list.') asserts.assert_true( cluster_name in [cluster['name'] for cluster in env_list_res], 'Can not find cluster name in fuel env command output') # Add and provision a controller node self.show_step(9) logger.info("Add to the cluster and start provisioning " "a controller node [{0}]".format(node_id[0])) cmd = ('fuel --env-id={0} node set --node {1} --role=controller'. format(cluster_id, node_id[0])) remote.execute(cmd) cmd = ( 'fuel --env-id={0} node --provision --node={1} --json'.format( cluster_id, node_id[0])) task = run_on_remote(remote, cmd, jsonify=True) self.assert_cli_task_success(task, remote, timeout=30 * 60) # Deploy the controller node self.show_step(10) cmd = ('fuel --env-id={0} node --deploy --node {1} --json'.format( cluster_id, node_id[0])) task = run_on_remote(remote, cmd, jsonify=True) self.assert_cli_task_success(task, remote, timeout=60 * 60) self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['sanity']) self.show_step(11) with self.env.d_env.get_admin_remote() as remote: res = remote.execute( 'fuel --env {0} env delete'.format(cluster_id)) asserts.assert_true(res['exit_code'] == 0) with self.env.d_env.get_admin_remote() as remote: try: wait(lambda: remote.execute("fuel env | awk '{print $1}'" " | tail -n 1 | grep '^.$'")[ 'exit_code'] == 1, timeout=60 * 10) except TimeoutError: raise TimeoutError( "cluster {0} was not deleted".format(cluster_id)) self.env.make_snapshot("review_fuel_cli_one_node_deploy")
def test_3_1_rabbit_failover(self): if not self.env.d_env.has_snapshot(self.snapshot_name): raise SkipTest() logger.info('Revert environment started...') self.env.revert_snapshot(self.snapshot_name) cluster_id = self.fuel_web.client.get_cluster_id( self.__class__.__name__) logger.info('Waiting for galera is up') # Wait until MySQL Galera is UP on some controller self.fuel_web.wait_mysql_galera_is_up(['slave-02']) # Check ha ans services are fine after revert self.fuel_web.assert_ha_services_ready(cluster_id, timeout=300) self.fuel_web.assert_os_services_ready(cluster_id) # get master rabbit controller master_rabbit = self.fuel_web.get_rabbit_master_node( self.env.d_env.nodes().slaves[0].name) logger.info('Try to find slave where rabbit slaves are running') # get rabbit slaves rabbit_slaves = self.fuel_web.get_rabbit_slaves_node( self.env.d_env.nodes().slaves[0].name) assert_true( rabbit_slaves, 'Can not find rabbit slaves. ' 'current result is {0}'.format(rabbit_slaves)) logger.info('Suspend node {0}'.format(rabbit_slaves[0].name)) # suspend devops node with rabbit slave rabbit_slaves[0].suspend(False) # Wait until Nailgun marked suspended controller as offline try: wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node( rabbit_slaves[0])['online'], timeout=60 * 5) except TimeoutError: raise TimeoutError('Node {0} does' ' not become offline ' 'in nailgun'.format(rabbit_slaves[0].name)) # check ha self.fuel_web.assert_ha_services_ready(cluster_id, timeout=300) # Run sanity and smoke tests to see if cluster operable self.fuel_web.run_ostf(cluster_id=cluster_id, should_fail=1) active_slaves = [ slave for slave in self.env.d_env.nodes().slaves[0:4] if slave.name != rabbit_slaves[0].name ] master_rabbit_after_slave_fail = self.fuel_web.get_rabbit_master_node( active_slaves[0].name) assert_equal(master_rabbit.name, master_rabbit_after_slave_fail.name) # turn on rabbit slave rabbit_slaves[0].resume(False) # Wait until Nailgun marked suspended controller as online try: wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node( rabbit_slaves[0])['online'], timeout=60 * 5) except TimeoutError: raise TimeoutError('Node {0} does' ' not become online ' 'in nailgun'.format(rabbit_slaves[0].name)) # check ha self.fuel_web.assert_ha_services_ready(cluster_id, timeout=300) # check os self.fuel_web.assert_os_services_ready(cluster_id) # run ostf smoke and sanity self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['smoke']) # check that master rabbit is the same master_rabbit_after_slave_back = self.fuel_web.get_rabbit_master_node( active_slaves[0].name) assert_equal(master_rabbit.name, master_rabbit_after_slave_back.name) # turn off rabbit master master_rabbit.suspend(False) # Wait until Nailgun marked suspended controller as offline try: wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node( master_rabbit)['online'], timeout=60 * 5) except TimeoutError: raise TimeoutError('Node {0} does' ' not become offline' 'in nailgun'.format(master_rabbit.name)) # check ha self.fuel_web.assert_ha_services_ready(cluster_id, timeout=300) self.fuel_web.run_ostf(cluster_id=cluster_id, should_fail=1) active_slaves = [ slave for slave in self.env.d_env.nodes().slaves[0:4] if slave.name != master_rabbit.name ] master_rabbit_after_fail = self.fuel_web.get_rabbit_master_node( active_slaves[0].name) assert_not_equal(master_rabbit.name, master_rabbit_after_fail.name) # turn on rabbit master master_rabbit.resume(False) # Wait until Nailgun marked suspended controller as online try: wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node( master_rabbit)['online'], timeout=60 * 5) except TimeoutError: raise TimeoutError('Node {0} does' ' not become online ' 'in nailgun'.format(master_rabbit.name)) # check ha self.fuel_web.assert_ha_services_ready(cluster_id, timeout=300) self.fuel_web.run_ostf(cluster_id=cluster_id) # check that master rabbit is the same master_rabbit_after_node_back = self.fuel_web.get_rabbit_master_node( active_slaves[0].name) assert_equal(master_rabbit_after_fail.name, master_rabbit_after_node_back.name)
def public_api_check_security_rules(self): """Check that security rules are properly applied for DMZ network Scenario: 1. Revert snapshot from previous test 2. Run instance 3. Try to access horizon from instance 4. Remove instance """ self.show_step(1) self.env.revert_snapshot('deploy_env_with_public_api') self.show_step(2) cluster_id = self.fuel_web.get_last_created_cluster() controller_ip = self.fuel_web.get_public_vip(cluster_id) os_conn = os_actions.OpenStackActions( controller_ip, user='******', passwd='admin', tenant='admin') # create instance net_name = self.fuel_web.get_cluster_predefined_networks_name( cluster_id)['private_net'] vm = os_conn.create_server_for_migration(neutron=True, label=net_name) # Check if instance active os_conn.verify_instance_status(vm, 'ACTIVE') vm_floating_ip = os_conn.assign_floating_ip(vm) logger.info('Trying to get vm via tcp.') try: wait(lambda: tcp_ping(vm_floating_ip.ip, 22), timeout=120) except TimeoutError: raise TimeoutError('Can not ping instance' ' by floating ip {0}'.format(vm_floating_ip.ip)) logger.info('VM is accessible via ip: {0}'.format(vm_floating_ip.ip)) self.show_step(3) attributes = self.fuel_web.client.get_cluster_attributes(cluster_id) protocol = 'https' if attributes['editable']['public_ssl']['horizon'][ 'value'] is True else 'http' cmd = 'curl -I ' \ '{proto}://{ip}/horizon --insecure'.format(proto=protocol, ip=controller_ip) logger.info('Trying to access horizon from instance: {}'.format(cmd)) controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id=cluster_id, roles=['controller'] )[0] ssh = self.fuel_web.get_ssh_for_nailgun_node(controller) res = ssh.execute_through_host(hostname=vm_floating_ip.ip, cmd=cmd, auth=cirros_auth) logger.info(res.stdout) asserts.assert_equal(res.exit_code, 0, "Instance can't access " "horizon via DMZ network") self.show_step(4) # delete instance os_conn.delete_instance(vm) os_conn.verify_srv_deleted(vm)
def check_rh_hard_reboot(self): """Check that resumed VM is working properly after hard reboot of RH-based compute Scenario: 1. Revert environment with RH-compute. 2. Check that services are ready. 3. Boot VM on compute and check its connectivity via floating ip. 4. Hard reboot RH-based compute. 5. Verify VM connectivity via floating ip after successful reboot and VM resume action. Duration 20m Snapshot check_rh_hard_reboot """ self.show_step(1, initialize=True) self.env.revert_snapshot('ready_ha_one_controller_with_rh_compute', skip_timesync=True, skip_slaves_check=True) self.check_slaves_are_ready() logger.debug('All slaves online.') self.show_step(2) cluster_id = self.fuel_web.get_last_created_cluster() os_conn = os_actions.OpenStackActions( self.fuel_web.get_public_vip(cluster_id)) self.fuel_web.assert_cluster_ready(os_conn, smiles_count=5) logger.debug('Cluster up and ready.') self.show_step(3) cluster_id = self.fuel_web.get_last_created_cluster() controllers = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id, roles=('controller', )) os_conn = os_actions.OpenStackActions( self.fuel_web.get_public_vip(cluster_id)) asserts.assert_equal( len(controllers), 1, 'Environment does not have 1 controller node, ' 'found {} nodes!'.format(len(controllers))) compute = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id, ['compute'])[0] target_node = self.fuel_web.get_devops_node_by_nailgun_node(compute) target_node_ip = self.fuel_web.get_node_ip_by_devops_name( target_node.name) net_label = self.fuel_web.get_cluster_predefined_networks_name( cluster_id)['private_net'] vm = os_conn.create_server_for_migration(neutron=True, label=net_label) vm_floating_ip = os_conn.assign_floating_ip(vm) logger.info('Trying to get vm via tcp.') try: wait(lambda: tcp_ping(vm_floating_ip.ip, 22), timeout=120) except TimeoutError: raise TimeoutError('Can not ping instance' ' by floating ip {0}'.format(vm_floating_ip.ip)) logger.info('VM is accessible via ip: {0}'.format(vm_floating_ip.ip)) self.show_step(4) target_node.destroy() asserts.assert_false(target_node.driver.node_active(node=target_node), 'Target node still active') target_node.start() asserts.assert_true(target_node.driver.node_active(node=target_node), 'Target node did not start') self.wait_for_slave_provision(target_node_ip) self.fuel_web.assert_cluster_ready(os_conn, smiles_count=5) logger.info('All cluster services up and ' 'running after compute hard reboot.') self.show_step(5) asserts.assert_equal( os_conn.get_instance_detail(vm).status, "ACTIVE", "Instance did not reach active state after compute back online, " "current state is {0}".format( os_conn.get_instance_detail(vm).status)) logger.info('Spawned VM is ACTIVE. Trying to ' 'access it via ip: {0}'.format(vm_floating_ip.ip)) try: wait(lambda: tcp_ping(vm_floating_ip.ip, 22), timeout=120) except TimeoutError: raise TimeoutError('Can not ping instance' ' by floating ip {0}'.format(vm_floating_ip.ip)) logger.info('VM is accessible. Deleting it.') os_conn.delete_instance(vm) os_conn.verify_srv_deleted(vm)
def __exec_command(cls, command, cwd=None, env=None, timeout=None, verbose=True): """Command executor helper :type command: str :type cwd: str :type env: dict :type timeout: int :rtype: ExecResult """ def readlines(stream, verbose, lines_count=100): """Nonblocking read and log lines from stream""" if lines_count < 1: lines_count = 1 result = [] try: for _ in range(1, lines_count): line = stream.readline() if line: result.append(line) if verbose: print(line.rstrip()) except IOError: pass return result @threaded(started=True) def poll_pipes(proc, result, stop): """Polling task for FIFO buffers :type proc: Popen :type result: ExecResult :type stop: Event """ # Get file descriptors for stdout and stderr streams fd_stdout = proc.stdout.fileno() fd_stderr = proc.stderr.fileno() # Get flags of stdout and stderr streams fl_stdout = fcntl.fcntl(fd_stdout, fcntl.F_GETFL) fl_stderr = fcntl.fcntl(fd_stderr, fcntl.F_GETFL) # Set nonblock mode for stdout and stderr streams fcntl.fcntl(fd_stdout, fcntl.F_SETFL, fl_stdout | os.O_NONBLOCK) fcntl.fcntl(fd_stderr, fcntl.F_SETFL, fl_stderr | os.O_NONBLOCK) while not stop.isSet(): sleep(0.1) stdout_diff = readlines(proc.stdout, verbose) stderr_diff = readlines(proc.stderr, verbose) result.stdout += stdout_diff result.stderr += stderr_diff proc.poll() if proc.returncode is not None: result.exit_code = proc.returncode stdout_diff = readlines(proc.stdout, verbose) stderr_diff = readlines(proc.stderr, verbose) result.stdout += stdout_diff result.stderr += stderr_diff stop.set() # 1 Command per run with cls.__lock: result = ExecResult(cmd=command) stop_event = Event() logger.debug("Run command on the host: '{0}'".format(command)) # Run process = Popen(args=[command], stdin=PIPE, stdout=PIPE, stderr=PIPE, shell=True, cwd=cwd, env=env, universal_newlines=False) # Poll output poll_pipes(process, result, stop_event) # wait for process close stop_event.wait(timeout) output_tmpl = ('\tSTDOUT:\n' '{0}\n' '\tSTDERR"\n' '{1}\n') logger.debug(output_tmpl.format(result.stdout, result.stderr)) # Process closed? if stop_event.isSet(): stop_event.clear() return result # Kill not ended process and wait for close try: process.kill() # kill -9 stop_event.wait(5) except OSError: # Nothing to kill logger.warning("{} has been completed just after timeout: " "please validate timeout.".format(command)) no_ec_msg = ( "No return code received while waiting for the command " "'{0}' during {1}s !\n".format(command, timeout)) logger.debug(no_ec_msg) raise TimeoutError( no_ec_msg + output_tmpl.format(result.stdout_brief, result.stderr_brief))
def deploy_ha_neutron(self): """Check l3-agent rescheduling after l3-agent dies Scenario: 1. Create cluster. HA, Neutron with GRE segmentation 2. Add 3 nodes with controller roles 3. Add 2 nodes with compute roles 4. Add 1 node with cinder role 5. Deploy the cluster 6. Manually reschedule router from primary controller to another one 7. Stop l3-agent on new node with pcs 8. Check l3-agent was rescheduled 9. Check network connectivity from instance via dhcp namespace 10. Run OSTF Snapshot deploy_ha_neutron """ self.env.revert_snapshot("ready") self.env.bootstrap_nodes(self.env.nodes().slaves[:6]) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_HA, settings={ "net_provider": 'neutron', "net_segment_type": 'gre' }) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['controller'], 'slave-03': ['controller'], 'slave-04': ['compute'], 'slave-05': ['compute'], 'slave-06': ['cinder'] }) self.fuel_web.deploy_cluster_wait(cluster_id) os_conn = os_actions.OpenStackActions( self.fuel_web.get_public_vip(cluster_id)) net_id = os_conn.get_network('net04')['id'] node = os_conn.get_node_with_dhcp_for_network(net_id)[0] node_fqdn = node.split('.')[0] logger.debug('node name with dhcp is {0}'.format(node_fqdn)) devops_node = self.fuel_web.find_devops_node_by_nailgun_fqdn( node_fqdn, self.env.nodes().slaves[0:6]) remote = self.env.get_ssh_to_remote_by_name(devops_node.name) dhcp_namespace = ''.join( remote.execute( 'ip netns | grep {0}'.format(net_id))['stdout']).rstrip() logger.debug('dhcp namespace is {0}'.format(dhcp_namespace)) remote.execute( '. openrc;' ' nova keypair-add instancekey > /root/.ssh/webserver_rsa') remote.execute('chmod 400 /root/.ssh/webserver_rsa') instance = os_conn.create_server_for_migration(neutron=True, key_name='instancekey') instance_ip = instance.addresses['net04'][0]['addr'] logger.debug('instance internal ip is {0}'.format(instance_ip)) router_id = os_conn.get_routers_ids()[0] l3_agent_id = os_conn.get_l3_agent_ids(router_id)[0] logger.debug("l3 agent id is {0}".format(l3_agent_id)) another_l3_agent = os_conn.get_available_l3_agents_ids(l3_agent_id)[0] logger.debug("another l3 agent is {0}".format(another_l3_agent)) os_conn.remove_l3_from_router(l3_agent_id, router_id) os_conn.add_l3_to_router(another_l3_agent, router_id) wait(lambda: os_conn.get_l3_agent_ids(router_id), timeout=60 * 5) cmd = ". openrc; ip netns exec {0} ssh -i /root/.ssh/webserver_rsa" \ " -o 'StrictHostKeyChecking no'" \ " cirros@{1} \"ping -c 1 8.8.8.8\"".format(dhcp_namespace, instance_ip) wait(lambda: remote.execute(cmd)['exit_code'] == 0, timeout=60) res = remote.execute(cmd) assert_equal( 0, res['exit_code'], 'instance has no connectivity, exit code {0}'.format( res['exit_code'])) node_with_l3 = os_conn.get_l3_agent_hosts(router_id)[0] node_with_l3_fqdn = node_with_l3.split('.')[0] logger.debug("new node with l3 is {0}".format(node_with_l3_fqdn)) new_devops = self.fuel_web.find_devops_node_by_nailgun_fqdn( node_with_l3_fqdn, self.env.nodes().slaves[0:6]) new_remote = self.env.get_ssh_to_remote_by_name(new_devops.name) new_remote.execute( "pcs resource ban p_neutron-l3-agent {0}".format(node_with_l3)) try: wait(lambda: not node_with_l3 == os_conn.get_l3_agent_hosts( router_id)[0], timeout=60 * 3) except TimeoutError: raise TimeoutError( "l3 agent wasn't banned, it is still {0}".format( os_conn.get_l3_agent_hosts(router_id)[0])) wait(lambda: os_conn.get_l3_agent_ids(router_id), timeout=60) res = remote.execute(cmd) assert_equal( 0, res['exit_code'], 'instance has no connectivity, exit code is {0}'.format( res['exit_code'])) self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['ha', 'smoke', 'sanity']) new_remote.execute( "pcs resource clear p_neutron-l3-agent {0}".format(node_with_l3))
def deploy_neutron_ip_v6(self): """Check IPv6 only functionality for Neutron VLAN Scenario: 1. Revert deploy_neutron_vlan snapshot 2. Create two dualstack network IPv6 subnets (should be in SLAAC mode, address space should not intersect). 3. Create virtual router and set gateway. 4. Attach this subnets to the router. 5. Create a Security Group, that allows SSH and ICMP for both IPv4 and IPv6. 6. Launch two instances, one for each network. 7. Lease a floating IP. 8. Attach Floating IP for main instance. 9. SSH to the main instance and ping6 another instance. Duration 10m Snapshot deploy_neutron_ip_v6 """ self.show_step(1, initialize=True) self.env.revert_snapshot("deploy_neutron_vlan") cluster_id = self.fuel_web.get_last_created_cluster() public_vip = self.fuel_web.get_public_vip(cluster_id) logger.info('Public vip is %s', public_vip) os_conn = os_actions.OpenStackActions(controller_ip=public_vip, user='******', passwd='simpleVlan', tenant='simpleVlan') tenant = os_conn.get_tenant('simpleVlan') self.show_step(2) net1 = os_conn.create_network(network_name='net1', tenant_id=tenant.id)['network'] net2 = os_conn.create_network(network_name='net2', tenant_id=tenant.id)['network'] subnet_1_v4 = os_conn.create_subnet(subnet_name='subnet_1_v4', network_id=net1['id'], cidr='192.168.100.0/24', ip_version=4) subnet_1_v6 = os_conn.create_subnet(subnet_name='subnet_1_v6', network_id=net1['id'], ip_version=6, cidr="2001:db8:100::/64", gateway_ip="2001:db8:100::1", ipv6_ra_mode="slaac", ipv6_address_mode="slaac") subnet_2_v4 = os_conn.create_subnet(subnet_name='subnet_2_v4', network_id=net2['id'], cidr='192.168.200.0/24', ip_version=4) subnet_2_v6 = os_conn.create_subnet(subnet_name='subnet_2_v6', network_id=net2['id'], ip_version=6, cidr="2001:db8:200::/64", gateway_ip="2001:db8:200::1", ipv6_ra_mode="slaac", ipv6_address_mode="slaac") self.show_step(3) router = os_conn.create_router('test_router', tenant=tenant) self.show_step(4) os_conn.add_router_interface(router_id=router["id"], subnet_id=subnet_1_v4["id"]) os_conn.add_router_interface(router_id=router["id"], subnet_id=subnet_1_v6["id"]) os_conn.add_router_interface(router_id=router["id"], subnet_id=subnet_2_v4["id"]) os_conn.add_router_interface(router_id=router["id"], subnet_id=subnet_2_v6["id"]) self.show_step(5) security_group = os_conn.create_sec_group_for_ssh() self.show_step(6) instance1 = os_conn.create_server( name='instance1', security_groups=[security_group], net_id=net1['id'], ) instance2 = os_conn.create_server( name='instance2', security_groups=[security_group], net_id=net2['id'], ) self.show_step(7) self.show_step(8) floating_ip = os_conn.assign_floating_ip(instance1) floating_ip2 = os_conn.assign_floating_ip(instance2) self.show_step(9) instance1_ipv6 = [ addr['addr'] for addr in instance1.addresses[net1['name']] if addr['version'] == 6 ].pop() instance2_ipv6 = [ addr['addr'] for addr in instance2.addresses[net2['name']] if addr['version'] == 6 ].pop() logger.info('\ninstance1:\n' '\tFloatingIP: {ip!s}\n' '\tIPv6 address: {ipv6!s}'.format(ip=floating_ip.ip, ipv6=instance1_ipv6)) logger.info('\ninstance2:\n' '\tFloatingIP: {ip!s}\n' '\tIPv6 address: {ipv6!s}'.format(ip=floating_ip2.ip, ipv6=instance2_ipv6)) with self.fuel_web.get_ssh_for_node("slave-01") as remote: def ssh_ready(vm_host): try: os_conn.execute_through_host(ssh=remote, vm_host=vm_host, cmd="ls -la", creds=("cirros", "cubswin:)")) return True except ChannelException: return False for vm_host, hostname in ((floating_ip.ip, instance1), (floating_ip2.ip, instance2)): try: wait(lambda: ssh_ready(vm_host), timeout=120) except TimeoutError: raise TimeoutError('ssh is not ready on host ' '{hostname:s} ({ip:s}) ' 'at timeout 120s'.format( hostname=hostname, ip=vm_host)) res = os_conn.execute_through_host(ssh=remote, vm_host=floating_ip.ip, cmd="{ping:s} -q " "-c{count:d} " "-w{deadline:d} " "-s{packetsize:d} " "{dst_address:s}".format( ping='ping6', count=10, deadline=20, packetsize=1452, dst_address=instance2_ipv6), creds=("cirros", "cubswin:)")) logger.info('Ping results: \n\t{res:s}'.format(res=res['stdout'])) assert_equal( res['exit_code'], 0, 'Ping failed with error code: {code:d}\n' '\tSTDOUT: {stdout:s}\n' '\tSTDERR: {stderr:s}'.format( code=res['exit_code'], stdout=res['stdout'], stderr=res['stderr'], )) self.env.make_snapshot('deploy_neutron_ip_v6')