def ceph_partitions_repetitive_cold_restart(self): """Ceph-osd partitions on 30% ~start rally~ repetitive cold restart Scenario: 1. Revert snapshot 'prepare_load_ceph_ha' 2. Wait until MySQL Galera is UP on some controller 3. Check Ceph status 4. Run ostf 5. Fill ceph partitions on all nodes up to 30% 6. Check Ceph status 7. Disable UMM 8. Run RALLY 9. 100 times repetitive reboot: 10. Cold restart of all nodes 11. Wait for HA services ready 12. Wait until MySQL Galera is UP on some controller 13. Run ostf Duration 1700m Snapshot ceph_partitions_repetitive_cold_restart """ self.show_step(1, initialize=True) self.env.revert_snapshot("prepare_load_ceph_ha") self.show_step(2) primary_controller = self.fuel_web.get_nailgun_primary_node( self.env.d_env.nodes().slaves[0]) self.fuel_web.wait_mysql_galera_is_up([primary_controller.name]) cluster_id = self.fuel_web.get_last_created_cluster() self.show_step(3) self.fuel_web.check_ceph_status(cluster_id) self.show_step(4) self.fuel_web.run_ostf(cluster_id=cluster_id) self.show_step(5) ceph_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles( cluster_id, ['ceph-osd']) for node in ceph_nodes: ip = node['ip'] file_dir = self.ssh_manager.execute_on_remote( ip=ip, cmd="mount | grep -m 1 ceph | awk '{printf($3)}'")['stdout'][0] fill_space(ip, file_dir, 30 * 1024) self.show_step(6) self.fuel_web.check_ceph_status(cluster_id) self.show_step(7) for node in self.fuel_web.client.list_cluster_nodes(cluster_id): change_config(node['ip'], umm=False) self.show_step(8) assert_true(settings.PATCHING_RUN_RALLY, 'PATCHING_RUN_RALLY was not set in true') rally_benchmarks = {} benchmark_results = {} for tag in set(settings.RALLY_TAGS): rally_benchmarks[tag] = RallyBenchmarkTest( container_repo=settings.RALLY_DOCKER_REPO, environment=self.env, cluster_id=cluster_id, test_type=tag ) benchmark_results[tag] = rally_benchmarks[tag].run() logger.debug(benchmark_results[tag].show()) self.show_step(9) for i in xrange(settings.RESTART_COUNT): self.show_step(10, 'number {}'.format(i + 1), initialize=True) self.fuel_web.cold_restart_nodes( self.env.d_env.get_nodes(name__in=[ 'slave-01', 'slave-02', 'slave-03', 'slave-04', 'slave-05'])) self.show_step(11) self.fuel_web.assert_ha_services_ready(cluster_id) self.fuel_web.assert_os_services_ready(cluster_id) self.show_step(12) self.fuel_web.wait_mysql_galera_is_up([primary_controller.name]) try: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['smoke'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'Create volume and attach it to instance')) except AssertionError: logger.debug("Test failed from first probe," " we sleep 180 seconds and try one more time " "and if it fails again - test will fail ") time.sleep(180) self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['smoke'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'Create volume and attach it to instance')) self.show_step(13) # LB 1519018 self.fuel_web.run_ostf(cluster_id=cluster_id) self.env.make_snapshot("ceph_partitions_repetitive_cold_restart")
def auto_cic_maintenance_mode(self): """Check auto maintenance mode for controller Scenario: 1. Revert snapshot 2. Unexpected reboot 3. Wait until controller is switching in maintenance mode 4. Exit maintenance mode 5. Check the controller become available Duration 155m """ self.env.revert_snapshot('cic_maintenance_mode') cluster_id = self.fuel_web.get_last_created_cluster() # Select a non-primary controller regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02") dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node( regular_ctrl) _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") change_config(_ip, reboot_count=0) logger.info('Change UMM.CONF on node-{0}'.format(_id)) logger.info('Unexpected reboot on node-{0}'.format(_id)) command = 'reboot --force >/dev/null & ' self.ssh_manager.execute_on_remote(ip=_ip, cmd=command) wait( lambda: not checkers.check_ping(self.env.get_admin_node_ip(), _ip), timeout=60 * 10, timeout_msg='Node {} still responds to ping'.format( dregular_ctrl.name)) self.fuel_web.wait_node_is_offline(dregular_ctrl) logger.info('Check that node-{0} in maintenance mode after' ' unexpected reboot'.format(_id)) asserts.assert_true( checkers.check_ping(self.env.get_admin_node_ip(), _ip, deadline=600), "Host {0} is not reachable by ping during 600 sec".format(_ip)) asserts.assert_true('True' in check_auto_mode(_ip), "Maintenance mode is not switched on") logger.info('turn off Maintenance mode') self.ssh_manager.execute_on_remote(ip=_ip, cmd="umm off") time.sleep(30) change_config(_ip) self.fuel_web.wait_node_is_online(dregular_ctrl) # Wait until MySQL Galera is UP on some controller self.fuel_web.wait_mysql_galera_is_up([dregular_ctrl.name]) # Wait until Cinder services UP on a controller self.fuel_web.wait_cinder_is_up([dregular_ctrl.name]) # Wait until RabbitMQ cluster is UP wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['ha'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'RabbitMQ availability')), timeout=1500) logger.info('RabbitMQ cluster is available') # Wait until all Openstack services are UP wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['sanity'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'Check that required services are running')), timeout=1500) logger.info("Required services are running") try: self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha']) except AssertionError: logger.debug("Test failed from first probe," " we sleep 600 second try one more time" " and if it fails again - test will fails ") time.sleep(600) self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha'])
def negative_auto_cic_maintenance_mode(self): """Check negative scenario for auto maintenance mode Scenario: 1. Revert snapshot 2. Disable UMM 3. Change UMM.CONF 4. Unexpected reboot 5. Check the controller not switching in maintenance mode 6. Check the controller become available Duration 85m """ self.env.revert_snapshot('cic_maintenance_mode') cluster_id = self.fuel_web.get_last_created_cluster() # Select a non-primary controller regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02") dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node( regular_ctrl) _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") logger.info('Disable UMM on node-{0}'.format(_id)) change_config(_ip, umm=False, reboot_count=0) asserts.assert_false('True' in check_available_mode(_ip), "Maintenance mode should not be available") logger.info('Unexpected reboot on node-{0}'.format(_id)) self.ssh_manager.check_call(ip=_ip, command='reboot >/dev/null & ') wait( lambda: not checkers.check_ping(self.env.get_admin_node_ip(), _ip), timeout=60 * 10, timeout_msg='Node {} still responds to ping'.format( dregular_ctrl.name)) # Node don't have enough time for set offline status # after reboot # Just waiting asserts.assert_true( checkers.check_ping(self.env.get_admin_node_ip(), _ip, deadline=600), "Host {0} is not reachable by ping during 600 sec".format(_ip)) self.fuel_web.wait_node_is_online(dregular_ctrl) logger.info('Check that node-{0} not in maintenance mode after' ' unexpected reboot'.format(_id)) wait(lambda: tcp_ping(_ip, 22), timeout=60 * 10, timeout_msg='Node {} still is not available by SSH'.format( dregular_ctrl.name)) asserts.assert_false('True' in check_auto_mode(_ip), "Maintenance mode should not switched") # Wait until MySQL Galera is UP on some controller self.fuel_web.wait_mysql_galera_is_up([dregular_ctrl.name]) # Wait until Cinder services UP on a controller self.fuel_web.wait_cinder_is_up([dregular_ctrl.name]) # Wait until RabbitMQ cluster is UP wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['ha'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'RabbitMQ availability')), timeout=1500) logger.info('RabbitMQ cluster is available') # TODO(astudenov): add timeout_msg wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['sanity'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'Check that required services are running')), timeout=1500) logger.info("Required services are running") try: self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha']) except AssertionError: logger.debug("Test failed from first probe," " we sleep 600 second try one more time" " and if it fails again - test will fails ") time.sleep(600) self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha'])
def negative_auto_cic_maintenance_mode(self): """Check negative scenario for auto maintenance mode Scenario: 1. Revert snapshot 2. Disable UMM 3. Change UMM.CONF 4. Unexpected reboot 5. Check the controller not switching in maintenance mode 6. Check the controller become available Duration 85m """ self.env.revert_snapshot('cic_maintenance_mode') cluster_id = self.fuel_web.get_last_created_cluster() # Select a non-primary controller regular_ctrl = self.fuel_web.get_nailgun_node_by_name("slave-02") dregular_ctrl = self.fuel_web.get_devops_node_by_nailgun_node( regular_ctrl) _ip = regular_ctrl['ip'] _id = regular_ctrl['id'] asserts.assert_true('True' in check_available_mode(_ip), "Maintenance mode is not available") logger.info('Disable UMM on node-{0}'.format(_id)) change_config(_ip, umm=False, reboot_count=0) asserts.assert_false('True' in check_available_mode(_ip), "Maintenance mode should not be available") command = 'reboot --force >/dev/null & ' logger.info('Unexpected reboot on node-{0}' .format(_id)) self.ssh_manager.execute_on_remote( ip=_ip, cmd=command) wait(lambda: not checkers.check_ping(self.env.get_admin_node_ip(), _ip), timeout=60 * 10, timeout_msg='Node {} still responds to ping'.format( dregular_ctrl.name)) # Node don't have enough time for set offline status # after reboot --force # Just waiting asserts.assert_true( checkers.check_ping(self.env.get_admin_node_ip(), _ip, deadline=600), "Host {0} is not reachable by ping during 600 sec" .format(_ip)) self.fuel_web.wait_node_is_online(dregular_ctrl) logger.info('Check that node-{0} not in maintenance mode after' ' unexpected reboot'.format(_id)) wait(lambda: tcp_ping(_ip, 22), timeout=60 * 10, timeout_msg='Node {} still is not available by SSH'.format( dregular_ctrl.name)) asserts.assert_false('True' in check_auto_mode(_ip), "Maintenance mode should not switched") # Wait until MySQL Galera is UP on some controller self.fuel_web.wait_mysql_galera_is_up( [dregular_ctrl.name]) # Wait until Cinder services UP on a controller self.fuel_web.wait_cinder_is_up( [dregular_ctrl.name]) # Wait until RabbitMQ cluster is UP wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['ha'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'RabbitMQ availability')), timeout=1500) logger.info('RabbitMQ cluster is available') # TODO(astudenov): add timeout_msg wait_pass(lambda: self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['sanity'], test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get( 'Check that required services are running')), timeout=1500) logger.info("Required services are running") try: self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha']) except AssertionError: logger.debug("Test failed from first probe," " we sleep 600 second try one more time" " and if it fails again - test will fails ") time.sleep(600) self.fuel_web.run_ostf(cluster_id, test_sets=['smoke', 'sanity', 'ha'])