def revert_snapshot(self, name, skip_timesync=False, skip_slaves_check=False): if not self.d_env.has_snapshot(name): return False logger.info("We have snapshot with such name: {:s}".format(name)) logger.info("Reverting the snapshot '{0}' ....".format(name)) self.d_env.revert(name) logger.info("Resuming the snapshot '{0}' ....".format(name)) self.resume_environment() if not skip_timesync: self.sync_time() else: self.sync_time(["admin"]) try: with QuietLogger(upper_log_level=logging.CRITICAL): # TODO(astudenov): add timeout_msg wait_pass( self.fuel_web.client.get_releases, expected=(exceptions.RetriableConnectionFailure, exceptions.UnknownConnectionError), timeout=300, ) except exceptions.Unauthorized: self.set_admin_keystone_password() self.fuel_web.get_nailgun_version() if not skip_slaves_check: # TODO(astudenov): add timeout_msg wait_pass(lambda: self.check_slaves_are_ready(), timeout=60 * 6) return True
def wait_for_provisioning(self, timeout=settings.WAIT_FOR_PROVISIONING_TIMEOUT): # TODO(astudenov): add timeout_msg wait_pass(lambda: tcp_ping_( self.d_env.nodes( ).admin.get_ip_address_by_network_name ('admin'), 22), timeout=timeout)
def revert_snapshot(self, name, skip_timesync=False, skip_slaves_check=False): if not self.d_env.has_snapshot(name): return False logger.info('We have snapshot with such name: {:s}'.format(name)) logger.info("Reverting the snapshot '{0}' ....".format(name)) self.d_env.revert(name) logger.info("Resuming the snapshot '{0}' ....".format(name)) self.resume_environment() if not skip_timesync: self.sync_time() else: self.sync_time(['admin']) try: with QuietLogger(upper_log_level=logging.CRITICAL): # TODO(astudenov): add timeout_msg wait_pass( self.fuel_web.client.get_releases, expected=( exceptions.RetriableConnectionFailure, exceptions.UnknownConnectionError), timeout=300) except exceptions.Unauthorized: self.set_admin_keystone_password() self.fuel_web.get_nailgun_version() if not skip_slaves_check: # TODO(astudenov): add timeout_msg wait_pass(lambda: self.check_slaves_are_ready(), timeout=60 * 6) return True
def bootstrap_nodes(self, devops_nodes, timeout=settings.BOOTSTRAP_TIMEOUT, skip_timesync=False): """Lists registered nailgun nodes Start vms and wait until they are registered on nailgun. :rtype : List of registered nailgun nodes """ # self.dhcrelay_check() for node in devops_nodes: logger.info("Bootstrapping node: {}".format(node.name)) node.start() # TODO(aglarendil): LP#1317213 temporary sleep # remove after better fix is applied time.sleep(5) with TimeStat("wait_for_nodes_to_start_and_register_in_nailgun"): wait(lambda: all(self.nailgun_nodes(devops_nodes)), 15, timeout, timeout_msg='Bootstrap timeout for nodes: {}' ''.format([node.name for node in devops_nodes])) wait_pass( lambda: checkers.validate_minimal_amount_nodes( nodes=self.nailgun_nodes(devops_nodes), expected_amount=len(devops_nodes) ), timeout=30) if not skip_timesync: self.sync_time() return self.nailgun_nodes(devops_nodes)
def wait_nailgun_available(self): """Check status for Nailgun""" fuel_web = self.manager.fuel_web wait_pass(fuel_web.get_nailgun_version, timeout=60 * 20)
def wait_check_network(self, kube_host_ip, works=True, timeout=120, interval=5): helpers.wait_pass( lambda: self.check_network(kube_host_ip, works=works), timeout=timeout, interval=interval)
def wait_check_network(k8sclient, namespace='default', works=True, timeout=120, interval=5, netchecker_pod_port=NETCHECKER_NODE_PORT): helpers.wait_pass( lambda: check_network(k8sclient, netchecker_pod_port=netchecker_pod_port, namespace=namespace, works=works), timeout=timeout, interval=interval)
def reboot_hw_node(self, ssh, salt, node): """Reboot the given node and wait for it to start back :param ssh: UnderlaySSHManager, tcp-qa SSH manager instance :param salt: SaltManager, tcp-qa Salt manager instance :param node: str, name of the node to reboot """ LOG.info("Sending reboot command to '{}' node.".format(node)) remote = ssh.remote(node_name=node) remote.execute_async("/sbin/shutdown -r now") # Wait for restarted node to boot and become accessible helpers.wait_pass(lambda: salt.local(node, "test.ping", timeout=5), timeout=60 * 10, interval=5)
def test_daemonset_rollout_rollingupdate(self, underlay, k8scluster, config, show_step): """Rollback a daemonset using updateStrategy type: RollingUpdate Scenario: 1. Deploy k8s using fuel-ccp-installer 2. Create a DaemonSet for nginx with image version 1_10 and update strategy RollingUpdate 3. Wait until nginx pods are created and become 'ready' 4. Check that the image version in the nginx pods is 1_10 Check that the image version in the nginx daemonset is 1_10 5. Change nginx image version to 1_11 using YAML 6. Wait for 10 seconds (needs to check that there were no auto updates of the nginx pods) 7. Check that the image version in the nginx daemonset is updated to 1_11 Wait for ~120 sec that the image version in the nginx pods is changed to 1_11 8. Rollback the nginx daemonset: kubectl rollout undo daemonset/nginx 9. Check that the image version in the nginx daemonset is downgraded to 1_10 Wait for ~120 sec that the image version in the nginx pods is downgraded to 1_10 Duration: 3000 seconds """ self.test_daemonset_rollingupdate(k8scluster, show_step) k8sclient = k8scluster.api show_step(8) cmd = "kubectl rollout undo daemonset/nginx" underlay.check_call(cmd, host=config.k8s.kube_host) # STEP #9 show_step(9) self.check_nginx_ds_image(k8sclient, self.from_nginx_image) # Pods should have new image version helpers.wait_pass( lambda: self.check_nginx_pods_image( k8sclient, self.from_nginx_image), timeout=2 * 60 )
def test_daemonset_rollout_rollingupdate(self, underlay, k8scluster, config, show_step): """Rollback a daemonset using updateStrategy type: RollingUpdate Scenario: 1. Deploy k8s using fuel-ccp-installer 2. Create a DaemonSet for nginx with image version 1_10 and update strategy RollingUpdate 3. Wait until nginx pods are created and become 'ready' 4. Check that the image version in the nginx pods is 1_10 Check that the image version in the nginx daemonset is 1_10 5. Change nginx image version to 1_11 using YAML 6. Wait for 10 seconds (needs to check that there were no auto updates of the nginx pods) 7. Check that the image version in the nginx daemonset is updated to 1_11 Wait for ~120 sec that the image version in the nginx pods is changed to 1_11 8. Rollback the nginx daemonset: kubectl rollout undo daemonset/nginx 9. Check that the image version in the nginx daemonset is downgraded to 1_10 Wait for ~120 sec that the image version in the nginx pods is downgraded to 1_10 Duration: 3000 seconds """ self.test_daemonset_rollingupdate(k8scluster, show_step) k8sclient = k8scluster.api show_step(8) cmd = "kubectl rollout undo daemonset/nginx" underlay.check_call(cmd, host=config.k8s.kube_host) # STEP #9 show_step(9) self.check_nginx_ds_image(k8sclient, self.from_nginx_image) # Pods should have new image version helpers.wait_pass(lambda: self.check_nginx_pods_image( k8sclient, self.from_nginx_image), timeout=2 * 60)
def rabbit_client(underlay, config, os_deployed): """Deploy openstack """ host = config.k8s.kube_host remote = underlay.remote(host=host) rabbit_port = ''.join(remote.execute( "kubectl get service --namespace ccp rabbitmq -o yaml |" " awk '/nodePort: / {print $NF}'")['stdout']) client = helpers.wait_pass(lambda: rabbit.RabbitClient(host, rabbit_port), interval=60, timeout=360) return client
def test_wait_pass(self, sleep): predicate = mock.Mock(return_value=True) result = helpers.wait_pass(predicate) self.assertTrue(result) predicate.reset_mock() predicate.side_effect = ValueError self.assertRaises(error.TimeoutError, helpers.wait_pass, predicate, timeout=1)
def test_wait_pass(self, sleep): predicate = mock.Mock(return_value=True) result = helpers.wait_pass(predicate) self.assertTrue(result) predicate.reset_mock() predicate.side_effect = ValueError self.assertRaises( error.TimeoutError, helpers.wait_pass, predicate, timeout=1)
def test_wait_pass(self, sleep, time): predicate = mock.Mock(return_value=True) result = helpers.wait_pass(predicate) self.assertTrue(result) time.assert_called_once() sleep.assert_not_called() time.reset_mock() time.return_value = 1 sleep.reset_mock() predicate.reset_mock() predicate.side_effect = ValueError self.assertRaises(ValueError, helpers.wait_pass, predicate, timeout=-1) sleep.assert_not_called() time.assert_has_calls([mock.call(), mock.call()])
def create_component_and_env_configdb(self): """ Install and check ConfigDB Scenario: 1. Revert snapshot empty 2. Install configDB extension 3. Create components 4. Create environment with component 5. Get and check created data 6. Make snapshot Duration: 5 min Snapshot: create_component_and_env_configdb """ self.check_run("create_component_and_env_configdb") self.show_step(1) self.env.revert_snapshot("empty") self.show_step(2) install_configdb() logger.debug("Waiting for ConfigDB") wait_pass(lambda: self.fuel_web.client.get_components(), timeout=45) logger.debug("Get env and component data") components = self.fuel_web.client.get_components() envs = self.fuel_web.client.get_environments() assert_false(components, "Components is not empty after tuningbox installation") assert_false(envs, "Environments is not empty after tuningbox installation") # Uploaded data component = { "name": "comp1", "resource_definitions": [ {"name": self.RESOURCE_NAME_1, "content": {}}, {"name": self.SLASHED_RESOURCE, "content": {}}, ], } environment = {"name": "env1", "components": ["comp1"], "hierarchy_levels": ["nodes"]} self.show_step(3) self.fuel_web.client.create_component(component) self.show_step(4) self.fuel_web.client.create_environment(environment) self.show_step(5) comp = self.fuel_web.client.get_components(comp_id=1) env = self.fuel_web.client.get_environments(env_id=1) expected_comp = { "resource_definitions": [ {"content": {}, "component_id": 1, "id": 1, "name": self.RESOURCE_NAME_1}, {"content": {}, "component_id": 1, "id": 2, "name": self.SLASHED_RESOURCE}, ], "id": 1, "name": "comp1", } expected_env = {"hierarchy_levels": ["nodes"], "id": 1, "components": [1]} logger.debug("Compare original component with " "received component from API") assert_equal(comp, expected_comp) logger.debug("Compare original env with received env from API") assert_equal(env, expected_env) self.show_step(6) self.env.make_snapshot("create_component_and_env_configdb", is_make=True)
def test_calico_route_recovery(self, show_step, config, underlay, k8s_deployed): """Test for deploying k8s environment with Calico plugin and check that local routes are recovered by felix after removal Scenario: 1. Install k8s with Calico network plugin. 2. Run netchecker-server service. 3. Run netchecker-agent daemon set. 4. Get network verification status. Check status is 'OK'. 5. Remove local route to netchecker-agent pod on the first node 6. Check that the route is automatically recovered 7. Get network verification status. Check status is 'OK'. Duration: 3000 seconds """ # STEP #1 show_step(1) k8sclient = k8s_deployed.api assert k8sclient.nodes.list() is not None, "Can not get nodes list" # STEP #2 show_step(2) netchecker.start_server(k8s=k8s_deployed, config=config) LOG.info("Waiting for netchecker server is running") netchecker.wait_check_network(k8sclient, works=True, timeout=300) # STEP #3 show_step(3) netchecker.start_agent(k8s=k8s_deployed, config=config) # STEP #4 show_step(4) netchecker.wait_check_network(k8sclient, works=True, timeout=300) # STEP #5 show_step(5) first_node = k8sclient.nodes.list()[0] first_node_ips = [ addr.address for addr in first_node.status.addresses if 'IP' in addr.type ] assert len(first_node_ips) > 0, "Couldn't find first k8s node IP!" first_node_names = [ name for name in underlay.node_names() if name.startswith(first_node.name) ] assert len(first_node_names) == 1, "Couldn't find first k8s node " \ "hostname in SSH config!" first_node_name = first_node_names.pop() target_pod_ip = None for pod in k8sclient.pods.list(): if pod.status.host_ip not in first_node_ips: continue # TODO: get pods by daemonset with name 'netchecker-agent' if 'netchecker-agent-' in pod.name and 'hostnet' not in pod.name: target_pod_ip = pod.status.pod_ip assert target_pod_ip is not None, "Could not find netchecker pod IP!" route_del_cmd = 'ip route delete {0}'.format(target_pod_ip) underlay.sudo_check_call(cmd=route_del_cmd, node_name=first_node_name) LOG.debug('Removed local route to pod IP {0} on node {1}'.format( target_pod_ip, first_node.name)) # STEP #6 show_step(6) route_chk_cmd = 'ip route list | grep -q "{0}"'.format(target_pod_ip) helpers.wait_pass(lambda: underlay.sudo_check_call( cmd=route_chk_cmd, node_name=first_node_name), timeout=120, interval=2) pod_ping_cmd = 'sleep 3 && ping -q -c 1 -w 3 {0}'.format(target_pod_ip) underlay.sudo_check_call(cmd=pod_ping_cmd, node_name=first_node_name) LOG.debug('Local route to pod IP {0} on node {1} is ' 'recovered'.format(target_pod_ip, first_node.name)) # STEP #7 show_step(7) netchecker.wait_check_network(k8sclient, works=True)
def test_maas_scheduled_backup_restore( self, underlay_actions, salt_actions, reclass_actions, show_step, cleanup_actions): """Test scheduled backup restore of MAAS data Scenario: 1. Update MAAS backup schedule to run every 5 minutes 2. Apply 'backupninja' state on the backupninja client node 3. Wait until backup creation is triggered by schedule 4. Wait until backup creation is finished 5. Verify that MAAS backup is created on backupninja server node 6. Delete/change some MAAS data 7. Restore the backup 8. Verify that MAAS data backup is restored 9. Verify MAAS services after restore Duration: ~ 3 min """ salt = salt_actions ssh = underlay_actions reclass = reclass_actions sm = salt.local("I@salt:master", "test.ping")['return'][0].keys()[0] server = salt.local( "I@backupninja:server", "test.ping")['return'][0].keys()[0] # Re-configure backup schedule show_step(1) self.update_backup_schedule(reclass, self.MAAS_YAML) # Apply 'backupninja' state on backupninja client node show_step(2) salt.enforce_state("I@backupninja:client", "backupninja") # Wait until backup is triggered by schedule show_step(3) helpers.wait_pass( lambda: ssh.check_call( cmd="pgrep backupninja && echo OK", node_name=sm), timeout=60 * 11, interval=5) # Wait until backup is finished show_step(4) ssh.check_call( cmd="while pgrep backupninja > /dev/null; do sleep 2; done", node_name=sm, timeout=60 * 5) # Verify that backup is created and all pieces of data are rsynced # to backupninja server show_step(5) self.check_backup( ssh, server, self.BCKP_SERVER_DIR, sm, self.MAAS_BACKUP_DIRS) # Simulate loss/change of some MAAS data show_step(6) self.delete_files(ssh, sm, self.MAAS_DIRS, self.MAAS_FILE_TO_DELETE) hashes = self.update_files( ssh, sm, self.MAAS_DIRS, self.MAAS_FILE_TO_UPDATE) # Restore the backup show_step(7) salt.enforce_state("I@maas:region", "maas.region") # Verify that all pieces of lost/changed data are restored show_step(8) self.verify_restored_data( ssh, sm, self.MAAS_DIRS, self.MAAS_FILE_TO_DELETE, self.MAAS_FILE_TO_UPDATE, hashes) # Verify that MAAS services are up and running after restore show_step(9) statuses = self.get_maas_svc_status(salt, sm) assert all(statuses.values()), ( "Not all MAAS services are active after restore. Please check the " "affected services (marked as 'False' below):\n{}".format(statuses) )
def negative_auto_cic_maintenance_mode(self): """Check negative scenario for auto maintenance mode Scenario: 1. Revert snapshot 2. Disable UMM 3. Change UMM.CONF 4. Unexpected reboot 5. Check the controller not switching in maintenance mode 6. Check the controller become available Duration 85m """ self.env.revert_snapshot('cic_maintenance_mode') cluster_id = self.fuel_web.get_last_created_cluster() # Select a non-primary controller regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02") dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node( regular_ctrl) _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") logger.info('Disable UMM on node-{0}'.format(_id)) change_config(_ip, umm=False, reboot_count=0) asserts.assert_false('True' in check_available_mode(_ip), "Maintenance mode should not be available") logger.info('Unexpected reboot on node-{0}'.format(_id)) self.ssh_manager.check_call(ip=_ip, command='reboot >/dev/null & ') wait( lambda: not checkers.check_ping(self.env.get_admin_node_ip(), _ip), timeout=60 * 10, timeout_msg='Node {} still responds to ping'.format( dregular_ctrl.name)) # Node don't have enough time for set offline status # after reboot # Just waiting asserts.assert_true( checkers.check_ping(self.env.get_admin_node_ip(), _ip, deadline=600), "Host {0} is not reachable by ping during 600 sec".format(_ip)) self.fuel_web.wait_node_is_online(dregular_ctrl) logger.info('Check that node-{0} not in maintenance mode after' ' unexpected reboot'.format(_id)) wait(lambda: tcp_ping(_ip, 22), timeout=60 * 10, timeout_msg='Node {} still is not available by SSH'.format( dregular_ctrl.name)) asserts.assert_false('True' in check_auto_mode(_ip), "Maintenance mode should not switched") # Wait until MySQL Galera is UP on some controller self.fuel_web.wait_mysql_galera_is_up([dregular_ctrl.name]) # Wait until Cinder services UP on a controller self.fuel_web.wait_cinder_is_up([dregular_ctrl.name]) # Wait until RabbitMQ cluster is UP wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['ha'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'RabbitMQ availability')), timeout=1500) logger.info('RabbitMQ cluster is available') # TODO(astudenov): add timeout_msg wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['sanity'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'Check that required services are running')), timeout=1500) logger.info("Required services are running") try: self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha']) except AssertionError: logger.debug("Test failed from first probe," " we sleep 600 second try one more time" " and if it fails again - test will fails ") time.sleep(600) self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha'])
def test_daemonset_rollingupdate(self, k8scluster, show_step): """Update a daemonset using updateStrategy type: RollingUpdate Scenario: 1. Deploy k8s using fuel-ccp-installer 2. Create a DaemonSet for nginx with image version 1_10 and update strategy RollingUpdate 3. Wait until nginx pods are created and become 'ready' 4. Check that the image version in the nginx pods is 1_10 Check that the image version in the nginx daemonset is 1_10 5. Change nginx image version to 1_11 using YAML 6. Wait for 10 seconds (needs to check that there were no auto updates of the nginx pods) 7. Check that the image version in the nginx daemonset is updated to 1_11 Wait for ~120 sec that the image version in the nginx pods is changed to 1_11 Duration: 3000 seconds """ # STEP #1 show_step(1) k8sclient = k8scluster.api assert k8sclient.nodes.list() is not None, "Can not get nodes list" # STEP #2 show_step(2) nginx_spec = self.get_nginx_spec() nginx_spec['spec']['template']['spec']['containers'][0][ 'image'] = self.from_nginx_image k8sclient.daemonsets.create(body=nginx_spec) # STEP #3 show_step(3) time.sleep(3) self.wait_nginx_pods_ready(k8sclient) # STEP #4 show_step(4) self.check_nginx_pods_image(k8sclient, self.from_nginx_image) self.check_nginx_ds_image(k8sclient, self.from_nginx_image) # STEP #5 show_step(5) nginx_spec['spec']['template']['spec']['containers'][0][ 'image'] = self.to_nginx_image k8sclient.daemonsets.update(body=nginx_spec, name=nginx_spec['metadata']['name']) # STEP #6 show_step(6) time.sleep(10) # STEP #7 show_step(7) # DaemonSet should have new image version self.check_nginx_ds_image(k8sclient, self.to_nginx_image) # Pods should have new image version helpers.wait_pass( lambda: self.check_nginx_pods_image( k8sclient, self.to_nginx_image), timeout=2 * 60)
def wait_running(kube_host_ip, timeout=120, interval=5): helpers.wait_pass( lambda: get_status(kube_host_ip), timeout=timeout, interval=interval)
def wait_netchecker_running(kube_host_ip, timeout=120, interval=5): helpers.wait_pass( lambda: TestFuelCCPNetChecker.get_netchecker_status(kube_host_ip), timeout=timeout, interval=interval)
interface = self.get_interface_by_nailgun_network_name(name) return interface.address_set.first().ip_address def remote(self, network_name, login, password=None, private_keys=None): """Create SSH-connection to the network :rtype : SSHClient """ return SSHClient( self.get_ip_address_by_network_name(network_name), username=login, password=password, private_keys=private_keys) def await(self, network_name, timeout=120, by_port=22): wait_pass( lambda: tcp_ping_( self.get_ip_address_by_network_name(network_name), by_port), timeout=timeout) # NEW def add_interfaces(self, interfaces): for interface in interfaces: label = interface['label'] l2_network_device_name = interface.get('l2_network_device') interface_model = interface.get('interface_model') self.add_interface( label=label, l2_network_device_name=l2_network_device_name, interface_model=interface_model) # NEW def add_interface(self, label, l2_network_device_name, interface_model):
def test_daemonset_rollingupdate(self, k8scluster, show_step): """Update a daemonset using updateStrategy type: RollingUpdate Scenario: 1. Deploy k8s using fuel-ccp-installer 2. Create a DaemonSet for nginx with image version 1_10 and update strategy RollingUpdate 3. Wait until nginx pods are created and become 'ready' 4. Check that the image version in the nginx pods is 1_10 Check that the image version in the nginx daemonset is 1_10 5. Change nginx image version to 1_11 using YAML 6. Wait for 10 seconds (needs to check that there were no auto updates of the nginx pods) 7. Check that the image version in the nginx daemonset is updated to 1_11 Wait for ~120 sec that the image version in the nginx pods is changed to 1_11 Duration: 3000 seconds """ # STEP #1 show_step(1) k8sclient = k8scluster.api assert k8sclient.nodes.list() is not None, "Can not get nodes list" # STEP #2 show_step(2) nginx_spec = self.get_nginx_spec() nginx_spec['spec']['template']['spec']['containers'][0][ 'image'] = self.from_nginx_image k8sclient.daemonsets.create(body=nginx_spec) # STEP #3 show_step(3) time.sleep(3) self.wait_nginx_pods_ready(k8sclient) # STEP #4 show_step(4) self.check_nginx_pods_image(k8sclient, self.from_nginx_image) self.check_nginx_ds_image(k8sclient, self.from_nginx_image) # STEP #5 show_step(5) nginx_spec['spec']['template']['spec']['containers'][0][ 'image'] = self.to_nginx_image k8sclient.daemonsets.update(body=nginx_spec, name=nginx_spec['metadata']['name']) # STEP #6 show_step(6) time.sleep(10) # STEP #7 show_step(7) # DaemonSet should have new image version self.check_nginx_ds_image(k8sclient, self.to_nginx_image) # Pods should have new image version helpers.wait_pass(lambda: self.check_nginx_pods_image( k8sclient, self.to_nginx_image), timeout=2 * 60)
def manual_cic_maintenance_mode(self): """Check manual maintenance mode for controller Scenario: 1. Revert snapshot 2. Switch in maintenance mode 3. Wait until controller is rebooting 4. Exit maintenance mode 5. Check the controller become available Duration 155m """ self.env.revert_snapshot('cic_maintenance_mode') cluster_id = self.fuel_web.get_last_created_cluster() # Select a non-primary controller regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02") dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node( regular_ctrl) _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] logger.info('Maintenance mode for node-{0}'.format(_id)) asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") self.ssh_manager.execute_on_remote( ip=_ip, cmd="umm on") self.fuel_web.wait_node_is_offline(dregular_ctrl) asserts.assert_true( checkers.check_ping(self.env.get_admin_node_ip(), _ip, deadline=600), "Host {0} is not reachable by ping during 600 sec" .format(_ip)) asserts.assert_true('True' in check_auto_mode(_ip), "Maintenance mode is not switched on") self.ssh_manager.execute_on_remote( ip=_ip, cmd="umm off") self.fuel_web.wait_node_is_online(dregular_ctrl) # Wait until Cinder services UP on a controller self.fuel_web.wait_cinder_is_up( [dregular_ctrl.name]) # Wait until RabbitMQ cluster is UP wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['ha'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'RabbitMQ availability')), timeout=1500) logger.info('RabbitMQ cluster is available') wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['sanity'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'Check that required services are running')), timeout=1500) logger.info("Required services are running") # TODO(astudenov): add timeout_msg try: self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha']) except AssertionError: logger.debug("Test failed from first probe," " we sleep 600 second try one more time" " and if it fails again - test will fails ") time.sleep(600) self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha'])
def negative_auto_cic_maintenance_mode(self): """Check negative scenario for auto maintenance mode Scenario: 1. Revert snapshot 2. Disable UMM 3. Change UMM.CONF 4. Unexpected reboot 5. Check the controller not switching in maintenance mode 6. Check the controller become available Duration 85m """ self.env.revert_snapshot('cic_maintenance_mode') cluster_id = self.fuel_web.get_last_created_cluster() # Select a non-primary controller regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02") dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node( regular_ctrl) _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") logger.info('Disable UMM on node-{0}'.format(_id)) change_config(_ip, umm=False, reboot_count=0) asserts.assert_false('True' in check_available_mode(_ip), "Maintenance mode should not be available") command = 'reboot --force >/dev/null & ' logger.info('Unexpected reboot on node-{0}' .format(_id)) self.ssh_manager.execute_on_remote( ip=_ip, cmd=command) wait(lambda: not checkers.check_ping(self.env.get_admin_node_ip(), _ip), timeout=60 * 10, timeout_msg='Node {} still responds to ping'.format( dregular_ctrl.name)) # Node don't have enough time for set offline status # after reboot --force # Just waiting asserts.assert_true( checkers.check_ping(self.env.get_admin_node_ip(), _ip, deadline=600), "Host {0} is not reachable by ping during 600 sec" .format(_ip)) self.fuel_web.wait_node_is_online(dregular_ctrl) logger.info('Check that node-{0} not in maintenance mode after' ' unexpected reboot'.format(_id)) wait(lambda: tcp_ping(_ip, 22), timeout=60 * 10, timeout_msg='Node {} still is not available by SSH'.format( dregular_ctrl.name)) asserts.assert_false('True' in check_auto_mode(_ip), "Maintenance mode should not switched") # Wait until MySQL Galera is UP on some controller self.fuel_web.wait_mysql_galera_is_up( [dregular_ctrl.name]) # Wait until Cinder services UP on a controller self.fuel_web.wait_cinder_is_up( [dregular_ctrl.name]) # Wait until RabbitMQ cluster is UP wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['ha'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'RabbitMQ availability')), timeout=1500) logger.info('RabbitMQ cluster is available') # TODO(astudenov): add timeout_msg wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['sanity'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'Check that required services are running')), timeout=1500) logger.info("Required services are running") try: self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha']) except AssertionError: logger.debug("Test failed from first probe," " we sleep 600 second try one more time" " and if it fails again - test will fails ") time.sleep(600) self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha'])
def test_restart_osd_node(self, salt_actions, drivetrain_actions, underlay_actions, show_step): """Verify that Ceph OSD node is not affected by system restart Scenario: 1. Find Ceph OSD nodes 2. Check Ceph cluster health before node restart (skipped until PROD-31374 is fixed) 3. Restart 1 Ceph OSD node 4. Check Ceph cluster health after node restart (skipped until PROD-31374 is fixed) 5. Run Tempest smoke test suite 6. Run test_ceph_status.py::test_ceph_osd and test_services.py::test_check_services[osd] sanity tests Duration: ~9 min """ salt = salt_actions ssh = underlay_actions dt = drivetrain_actions # Find Ceph OSD nodes show_step(1) tgt = "I@ceph:osd" osd_hosts = salt.local(tgt, "test.ping")['return'][0].keys() # Select a node for the test osd_host = osd_hosts[0] # Check Ceph cluster health before node restart show_step(2) ceph_health = self.get_ceph_health(ssh, osd_hosts) # FIXME: uncomment the check once PROD-31374 is fixed # status = all( # ["OK" in status for node, status in ceph_health.items()]) # assert status, "Ceph health is not OK: {0}".format(ceph_health) # Restart a Ceph OSD node show_step(3) LOG.info("Sending reboot command to '{}' node.".format(osd_host)) remote = ssh.remote(node_name=osd_host) remote.execute_async("/sbin/shutdown -r now") # Wait for restarted node to boot and become accessible helpers.wait_pass(remote.reconnect, timeout=60 * 3, interval=5) echo_request = "echo" echo_response = salt.local(osd_host, "test.echo", echo_request)['return'][0] assert echo_request == echo_response[osd_host], ( "Minion on node '{}' node is not responding after node " "reboot.".format(osd_host)) LOG.info("'{}' node is back after reboot.".format(osd_host)) # Check Ceph cluster health after node restart show_step(4) ceph_health = self.get_ceph_health(ssh, osd_hosts) # noqa # FIXME: uncomment the check once PROD-31374 is fixed # status = all( # ["OK" in status for node, status in ceph_health.items()]) # assert status, "Ceph health is not OK: {0}".format(ceph_health) # Run Tempest smoke test suite show_step(5) status = dt.start_job_on_cid_jenkins( job_name=self.TEMPEST_JOB_NAME, job_parameters=self.TEMPEST_JOB_PARAMETERS, start_timeout=self.JENKINS_START_TIMEOUT, build_timeout=self.JENKINS_BUILD_TIMEOUT) assert status == 'SUCCESS', ( "'{0}' job run status is {1} after executing Tempest smoke " "tests".format(self.TEMPEST_JOB_NAME, status)) # Run Sanity test show_step(6) status = dt.start_job_on_cid_jenkins( job_name=self.SANITY_JOB_NAME, job_parameters=self.SANITY_JOB_PARAMETERS, start_timeout=self.JENKINS_START_TIMEOUT, build_timeout=self.JENKINS_BUILD_TIMEOUT) assert status == 'SUCCESS', ( "'{0}' job run status is {1} after executing selected sanity " "tests".format(self.SANITY_JOB_NAME, status))
def test_daemonset_multirollout_rollingupdate_revision(self, config, k8scluster, show_step, underlay): """Rollout a daemonset using updateStrategy type: RollingUpdate and --to-revision argument Scenario: 1. Deploy k8s using fuel-ccp-installer 2. Create a DaemonSet for nginx with image version 1_10 and update strategy RollingUpdate 3. Wait until nginx pods are created and become 'ready' 4. Check that the image version in the nginx pods is 1_10 Check that the image version in the nginx daemonset is 1_10 5. Change nginx image version to 1_11 using YAML 6. Check that the image version in the nginx daemonset is updated to 1_11 Wait for ~120 sec that the image version in the nginx pods is changed to 1_11 7. Change nginx image version to 1_12 using YAML 8. Check that the image version in the nginx daemonset is updated to 1_12. Wait for ~120 sec that the image version in the nginx pods is changed to 1_12 . 9. Get the revision #1 and check that there are the image version 1_10 10. Get the revision #2 and check that there are the image version 1_11 11. Get the revision #3 and check that there are the image version 1_12 12. Rollback the nginx daemonset to revision #1: kubectl rollout undo daemonset/nginx --to-revision=1 13. Check that the image version in the nginx daemonset is updated to 1_10 Wait for ~120 sec that the image version in the nginx pods is changed to 1_10 14. Rollback the nginx daemonset: kubectl rollout undo daemonset/nginx 15. Check that the image version in the nginx daemonset is updated to 1_12 Wait for ~120 sec that the image version in the nginx pods is changed to 1_12 Duration: 3000 seconds """ # STEP #1 show_step(1) k8sclient = k8scluster.api assert k8sclient.nodes.list() is not None, "Can not get nodes list" # STEP #2 show_step(2) nginx_spec = self.get_nginx_spec() nginx_spec['spec']['template']['spec']['containers'][0][ 'image'] = self.from_nginx_image k8sclient.daemonsets.create(body=nginx_spec) # STEP #3 show_step(3) time.sleep(3) self.wait_nginx_pods_ready(k8sclient) # STEP #4 show_step(4) self.check_nginx_pods_image(k8sclient, self.from_nginx_image) self.check_nginx_ds_image(k8sclient, self.from_nginx_image) # STEP #5 show_step(5) nginx_spec['spec']['template']['spec']['containers'][0][ 'image'] = self.to_nginx_image k8sclient.daemonsets.update(body=nginx_spec, name=nginx_spec['metadata']['name']) # STEP #6 show_step(6) # DaemonSet should have new image version self.check_nginx_ds_image(k8sclient, self.to_nginx_image) # Pods should have new image version helpers.wait_pass( lambda: self.check_nginx_pods_image( k8sclient, self.to_nginx_image), timeout=2 * 60) # STEP #7 show_step(7) nginx_spec['spec']['template']['spec']['containers'][0][ 'image'] = self.to_nginx_image_1_12 k8sclient.daemonsets.update(body=nginx_spec, name=nginx_spec['metadata']['name']) # STEP #8 show_step(8) # DaemonSet should have new image version self.check_nginx_ds_image(k8sclient, self.to_nginx_image_1_12) # Pods should have new image version helpers.wait_pass( lambda: self.check_nginx_pods_image( k8sclient, self.to_nginx_image_1_12), timeout=2 * 60) # STEP #9 show_step(9) self.check_nginx_revision_image(config=config.k8s.kube_host, underlay=underlay, revision="1", nginx_image=self.from_nginx_image) # STEP #10 show_step(10) self.check_nginx_revision_image(config=config.k8s.kube_host, underlay=underlay, revision="2", nginx_image=self.to_nginx_image) # STEP #11 show_step(11) self.check_nginx_revision_image(config=config.k8s.kube_host, underlay=underlay, revision="3", nginx_image=self.to_nginx_image_1_12) # STEP #12 show_step(12) cmd = "kubectl rollout undo daemonset/nginx --to-revision=1" underlay.check_call(cmd, host=config.k8s.kube_host) # STEP #13 show_step(13) self.check_nginx_ds_image(k8sclient, self.to_nginx_image) # Pods should have old image version helpers.wait_pass( lambda: self.check_nginx_pods_image( k8sclient, self.from_nginx_image), timeout=2 * 60 ) # STEP #14 show_step(14) cmd = "kubectl rollout undo daemonset/nginx" underlay.check_call(cmd, host=config.k8s.kube_host) # STEP #15 show_step(15) self.check_nginx_ds_image(k8sclient, self.from_nginx_image) # Pods should have new image version helpers.wait_pass( lambda: self.check_nginx_pods_image( k8sclient, self.to_nginx_image_1_12), timeout=2 * 60 )
def wait_check_network(kube_host_ip, works=True, timeout=120, interval=5): helpers.wait_pass(lambda: check_network(kube_host_ip, works=works), timeout=timeout, interval=interval)
def test_calico_route_recovery(self, show_step, config, underlay, k8s_deployed): """Test for deploying k8s environment with Calico plugin and check that local routes are recovered by felix after removal Scenario: 1. Check k8s installation. 2. Check netchecker-server service. 3. Check netchecker-agent daemon set. 4. Get network verification status. Excepted status is 'OK'. 5. Get metrics from netchecker. 6. Remove local route to netchecker-agent pod on the first node. 7. Check that the route is automatically recovered. 8. Get network verification status. Excepted status is 'OK'. Duration: 3000 seconds """ show_step(1) nch = netchecker.Netchecker(k8s_deployed.api) show_step(2) nch.wait_netchecker_pods_running('netchecker-server') show_step(3) nch.wait_netchecker_pods_running('netchecker-agent') show_step(4) nch.wait_check_network(works=True) show_step(5) res = nch.get_metric() assert res.status_code == 200, 'Unexpected response code {}'\ .format(res) metrics = [ 'ncagent_error_count_total', 'ncagent_http_probe_code', 'ncagent_http_probe_connect_time_ms', 'ncagent_http_probe_connection_result', 'ncagent_http_probe_content_transfer_time_ms', 'ncagent_http_probe_dns_lookup_time_ms', 'ncagent_http_probe_server_processing_time_ms', 'ncagent_http_probe_tcp_connection_time_ms', 'ncagent_http_probe_total_time_ms', 'ncagent_report_count_total' ] for metric in metrics: assert metric in res.text.strip(), \ 'Mandatory metric {0} is missing in {1}'.format( metric, res.text) show_step(6) first_node = k8s_deployed.api.nodes.list()[0] first_node_ips = [ addr.address for addr in first_node.read().status.addresses if 'IP' in addr.type ] assert len(first_node_ips) > 0, "Couldn't find first k8s node IP!" first_node_names = [ name for name in underlay.node_names() if name.startswith(first_node.name) ] first_node_name = first_node_names[0] target_pod_ip = None for pod in k8s_deployed.api.pods.list(namespace='netchecker'): LOG.debug('NC pod IP: {0}'.format(pod.read().status.pod_ip)) if pod.read().status.host_ip not in first_node_ips: continue # TODO: get pods by daemonset with name 'netchecker-agent' if 'netchecker-agent-' in pod.name and 'hostnet' not in pod.name: target_pod_ip = pod.read().status.pod_ip assert target_pod_ip is not None, "Could not find netchecker pod IP!" route_del_cmd = 'ip route delete {0}'.format(target_pod_ip) underlay.sudo_check_call(cmd=route_del_cmd, node_name=first_node_name) LOG.debug('Removed local route to pod IP {0} on node {1}'.format( target_pod_ip, first_node.name)) show_step(7) route_chk_cmd = 'ip route list | grep -q "{0}"'.format(target_pod_ip) helpers.wait_pass(lambda: underlay.sudo_check_call( cmd=route_chk_cmd, node_name=first_node_name), timeout=120, interval=2) pod_ping_cmd = 'sleep 120 && ping -q -c 1 -w 3 {0}'.format( target_pod_ip) underlay.sudo_check_call(cmd=pod_ping_cmd, node_name=first_node_name) LOG.debug('Local route to pod IP {0} on node {1} is ' 'recovered'.format(target_pod_ip, first_node.name)) show_step(8) nch.wait_check_network(works=True)
def deploy_env_with_public_api(self): """Deploy environment with enabled DMZ network for API. Scenario: 1. Revert snapshot with ready master node 2. Create new environment 3. Run network verification 4. Deploy the environment 5. Run network verification 6. Run OSTF 7. Reboot cluster nodes 8. Run OSTF 9. Create environment snapshot deploy_env_with_public_api Duration 120m Snapshot deploy_env_with_public_api """ asserts.assert_true(settings.ENABLE_DMZ, "ENABLE_DMZ variable wasn't exported") self.check_run('deploy_env_with_public_api') self.show_step(1) self.env.revert_snapshot('ready_with_5_slaves') self.show_step(2) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['compute'], 'slave-03': ['cinder'], }, update_interfaces=False ) network_template = utils.get_network_template('public_api') self.fuel_web.client.upload_network_template( cluster_id=cluster_id, network_template=network_template) net = self.fuel_web.get_network_pool('os-api') nodegroup = self.fuel_web.get_nodegroup(cluster_id) os_api_template = { "group_id": nodegroup['id'], "name": 'os-api', "cidr": net['network'], "gateway": net['gateway'], "meta": { 'notation': 'cidr', 'render_type': None, 'map_priority': 2, 'configurable': True, 'use_gateway': True, 'name': 'os-api', 'cidr': net['network'], 'vlan_start': None, 'vips': ['haproxy'] } } self.fuel_web.client.add_network_group(os_api_template) logger.debug('Networks: {0}'.format( self.fuel_web.client.get_network_groups())) self.show_step(3) self.fuel_web.verify_network(cluster_id) self.show_step(4) self.fuel_web.deploy_cluster_wait(cluster_id, timeout=180 * 60) self.show_step(5) self.fuel_web.verify_network(cluster_id) self.show_step(6) self.fuel_web.run_ostf(cluster_id=cluster_id) self.show_step(7) nodes = self.fuel_web.client.list_cluster_nodes(cluster_id) self.fuel_web.warm_restart_nodes( self.fuel_web.get_devops_nodes_by_nailgun_nodes(nodes)) controller = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id=cluster_id, roles=['controller'] )[0] controller_devops = \ self.fuel_web.get_devops_node_by_nailgun_node(controller) # Wait until MySQL Galera is UP on some controller self.fuel_web.wait_mysql_galera_is_up([controller_devops.name]) # Wait until Cinder services UP on a controller self.fuel_web.wait_cinder_is_up([controller_devops.name]) wait_pass( lambda: self.fuel_web.run_ostf(cluster_id, test_sets=['sanity', 'smoke']), interval=10, timeout=12 * 60 ) self.show_step(8) self.fuel_web.run_ostf(cluster_id=cluster_id) self.show_step(9) self.env.make_snapshot('deploy_env_with_public_api', is_make=True)
def manual_cic_maintenance_mode(self): """Check manual maintenance mode for controller Scenario: 1. Revert snapshot 2. Switch in maintenance mode 3. Wait until controller is rebooting 4. Exit maintenance mode 5. Check the controller become available Duration 155m """ self.env.revert_snapshot('cic_maintenance_mode') cluster_id = self.fuel_web.get_last_created_cluster() # Select a non-primary controller regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02") dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node( regular_ctrl) _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] logger.info('Maintenance mode for node-{0}'.format(_id)) asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") self.ssh_manager.check_call(ip=_ip, command="umm on", expected=[-1]) self.fuel_web.wait_node_is_offline(dregular_ctrl) asserts.assert_true( checkers.check_ping(self.env.get_admin_node_ip(), _ip, deadline=600), "Host {0} is not reachable by ping during 600 sec".format(_ip)) asserts.assert_true('True' in check_auto_mode(_ip), "Maintenance mode is not switched on") self.ssh_manager.check_call(ip=_ip, command="umm off") self.fuel_web.wait_node_is_online(dregular_ctrl) # Wait until Cinder services UP on a controller self.fuel_web.wait_cinder_is_up([dregular_ctrl.name]) # Wait until RabbitMQ cluster is UP wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['ha'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'RabbitMQ availability')), timeout=1500) logger.info('RabbitMQ cluster is available') wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['sanity'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'Check that required services are running')), timeout=1500) logger.info("Required services are running") # TODO(astudenov): add timeout_msg try: self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha']) except AssertionError: logger.debug("Test failed from first probe," " we sleep 600 second try one more time" " and if it fails again - test will fails ") time.sleep(600) self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha'])
def await (self, network_name, timeout=120, by_port=22): wait_pass(lambda: tcp_ping_( self.get_ip_address_by_network_name(network_name), by_port), timeout=timeout)
def test_salt_master_scheduled_backup_restore( self, underlay_actions, salt_actions, reclass_actions, show_step, precreate_sm_test_files, cleanup_actions): """Test scheduled backup restore of Salt master data Scenario: 1. Update Salt master backup schedule to run every 5 minutes 2. Apply 'backupninja' state on the backupninja client node 3. Wait until backup creation is triggered by schedule 4. Wait until backup creation is finished 5. Verify that Salt master backup is created on backupninja server node 6. Delete/change some reclass data 7. Restore the backup 8. Verify that Salt master data backup is restored 9. Verify that minions are responding Duration: ~ 3 min """ salt = salt_actions ssh = underlay_actions reclass = reclass_actions sm = salt.local("I@salt:master", "test.ping")['return'][0].keys()[0] server = salt.local( "I@backupninja:server", "test.ping")['return'][0].keys()[0] # Re-configure backup schedule show_step(1) self.update_backup_schedule(reclass, self.SM_YAML) # Apply 'backupninja' state on backupninja client node show_step(2) salt.enforce_state("I@backupninja:client", "backupninja") # Wait until backup is triggered by schedule show_step(3) helpers.wait_pass( lambda: ssh.check_call( cmd="pgrep backupninja && echo OK", node_name=sm), timeout=60 * 11, interval=5) # Wait until backup is finished show_step(4) ssh.check_call( cmd="while pgrep backupninja > /dev/null; do sleep 2; done", node_name=sm, timeout=60 * 5) # Verify that backup is created and all pieces of data are rsynced # to backupninja server show_step(5) self.check_backup( ssh, server, self.BCKP_SERVER_DIR, sm, self.SM_BACKUP_DIRS) # Simulate loss/change of some reclass data show_step(6) self.delete_files(ssh, sm, self.SM_DIRS, self.SM_FILE_TO_DELETE) hashes = self.update_files( ssh, sm, self.SM_DIRS, self.SM_FILE_TO_UPDATE) # Restore the backup show_step(7) ssh.check_call( "salt-call state.sls salt.master.restore,salt.minion.restore", node_name=sm, raise_on_err=False, timeout=60 * 4) # Verify that all pieces of lost/changed data are restored show_step(8) self.verify_restored_data( ssh, sm, self.SM_DIRS, self.SM_FILE_TO_DELETE, self.SM_FILE_TO_UPDATE, hashes) # Ping minions show_step(9) salt.local('*', "test.ping", timeout=30)
def create_component_and_env_configdb(self): """ Install and check ConfigDB Scenario: 1. Revert snapshot empty 2. Install configDB extension 3. Create components 4. Create environment with component 5. Get and check created data 6. Make snapshot Duration: 5 min Snapshot: create_component_and_env_configdb """ self.check_run('create_component_and_env_configdb') self.show_step(1) self.env.revert_snapshot('empty') self.show_step(2) install_configdb() logger.debug('Waiting for ConfigDB') wait_pass(lambda: self.fuel_web.client.get_components(), timeout=45) logger.debug('Get env and component data') components = self.fuel_web.client.get_components() envs = self.fuel_web.client.get_environments() assert_false(components, "Components is not empty after tuningbox installation") assert_false(envs, "Environments is not empty after tuningbox installation") # Uploaded data component = { "name": "comp1", "resource_definitions": [{ "name": self.RESOURCE_NAME_1, "content": {} }, { "name": self.SLASHED_RESOURCE, "content": {} }] } environment = { "name": "env1", "components": ["comp1"], "hierarchy_levels": ["nodes"] } self.show_step(3) self.fuel_web.client.create_component(component) self.show_step(4) self.fuel_web.client.create_environment(environment) self.show_step(5) comp = self.fuel_web.client.get_components(comp_id=1) env = self.fuel_web.client.get_environments(env_id=1) expected_comp = { 'resource_definitions': [{ 'content': {}, 'component_id': 1, 'id': 1, 'name': self.RESOURCE_NAME_1 }, { 'content': {}, 'component_id': 1, 'id': 2, 'name': self.SLASHED_RESOURCE }], 'id': 1, 'name': "comp1" } expected_env = { 'hierarchy_levels': ["nodes"], 'id': 1, 'components': [1] } logger.debug('Compare original component with ' 'received component from API') assert_equal(comp, expected_comp) logger.debug('Compare original env with received env from API') assert_equal(env, expected_env) self.show_step(6) self.env.make_snapshot('create_component_and_env_configdb', is_make=True)
def auto_cic_maintenance_mode(self): """Check auto maintenance mode for controller Scenario: 1. Revert snapshot 2. Unexpected reboot 3. Wait until controller is switching in maintenance mode 4. Exit maintenance mode 5. Check the controller become available Duration 155m """ self.env.revert_snapshot('cic_maintenance_mode') cluster_id = self.fuel_web.get_last_created_cluster() # Select a non-primary controller regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02") dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node( regular_ctrl) _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") change_config(_ip, reboot_count=0) logger.info('Change UMM.CONF on node-{0}'.format(_id)) logger.info('Unexpected reboot on node-{0}'.format(_id)) command = 'reboot --force >/dev/null & ' self.ssh_manager.execute_on_remote(ip=_ip, cmd=command) wait( lambda: not checkers.check_ping(self.env.get_admin_node_ip(), _ip), timeout=60 * 10, timeout_msg='Node {} still responds to ping'.format( dregular_ctrl.name)) self.fuel_web.wait_node_is_offline(dregular_ctrl) logger.info('Check that node-{0} in maintenance mode after' ' unexpected reboot'.format(_id)) asserts.assert_true( checkers.check_ping(self.env.get_admin_node_ip(), _ip, deadline=600), "Host {0} is not reachable by ping during 600 sec".format(_ip)) asserts.assert_true('True' in check_auto_mode(_ip), "Maintenance mode is not switched on") logger.info('turn off Maintenance mode') self.ssh_manager.execute_on_remote(ip=_ip, cmd="umm off") time.sleep(30) change_config(_ip) self.fuel_web.wait_node_is_online(dregular_ctrl) # Wait until MySQL Galera is UP on some controller self.fuel_web.wait_mysql_galera_is_up([dregular_ctrl.name]) # Wait until Cinder services UP on a controller self.fuel_web.wait_cinder_is_up([dregular_ctrl.name]) # Wait until RabbitMQ cluster is UP wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['ha'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'RabbitMQ availability')), timeout=1500) logger.info('RabbitMQ cluster is available') # Wait until all Openstack services are UP wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['sanity'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'Check that required services are running')), timeout=1500) logger.info("Required services are running") try: self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha']) except AssertionError: logger.debug("Test failed from first probe," " we sleep 600 second try one more time" " and if it fails again - test will fails ") time.sleep(600) self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha'])