def ceph_multinode_with_cinder(self): """Deploy ceph with cinder in simple mode Scenario: 1. Create cluster 2. Add 1 node with controller role 3. Add 1 node with compute role 4. Add 2 nodes with cinder and ceph OSD roles 5. Deploy the cluster 6. Check ceph status 7. Check partitions on controller node Snapshot ceph_multinode_with_cinder """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready") self.env.bootstrap_nodes(self.env.nodes().slaves[:4]) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_ceph': False, 'images_ceph': True, 'volumes_lvm': True } ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['compute'], 'slave-03': ['cinder', 'ceph-osd'], 'slave-04': ['cinder', 'ceph-osd'] } ) # Cluster deploy self.fuel_web.deploy_cluster_wait(cluster_id) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) disks = self.fuel_web.client.get_node_disks( self.fuel_web.get_nailgun_node_by_name('slave-01')['id']) logger.info("Current disk partitions are: \n{d}".format(d=disks)) logger.info("Check unallocated space") # We expect failure here only for release 5.0 due to bug # https://bugs.launchpad.net/fuel/+bug/1306625, so it is # necessary to assert_true in the next release. assert_false( checkers.check_unallocated_space(disks, contr_img_ceph=True), "Check unallocated space on controller") # Run ostf self.fuel_web.run_ostf(cluster_id=cluster_id) self.env.make_snapshot("ceph_multinode_with_cinder")
def ceph_multinode_with_cinder(self): """Deploy ceph with cinder in simple mode Scenario: 1. Create cluster 2. Add 1 node with controller role 3. Add 1 node with compute role 4. Add 2 nodes with cinder and ceph OSD roles 5. Deploy the cluster 6. Check ceph status 7. Check partitions on controller node Snapshot ceph_multinode_with_cinder """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready") self.env.bootstrap_nodes(self.env.nodes().slaves[:4]) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_ceph': False, 'images_ceph': True, 'volumes_lvm': True, 'tenant': 'ceph2', 'user': '******', 'password': '******' }) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['compute'], 'slave-03': ['cinder', 'ceph-osd'], 'slave-04': ['cinder', 'ceph-osd'] }) # Cluster deploy self.fuel_web.deploy_cluster_wait(cluster_id) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) disks = self.fuel_web.client.get_node_disks( self.fuel_web.get_nailgun_node_by_name('slave-01')['id']) logger.info("Current disk partitions are: \n{d}".format(d=disks)) logger.info("Check unallocated space") # We expect failure here only for release 5.0 due to bug # https://bugs.launchpad.net/fuel/+bug/1306625, so it is # necessary to assert_true in the next release. assert_false( checkers.check_unallocated_space(disks, contr_img_ceph=True), "Check unallocated space on controller") # Run ostf self.fuel_web.run_ostf(cluster_id=cluster_id) self.env.make_snapshot("ceph_multinode_with_cinder")
def check_ceph_status(self, cluster_id, offline_nodes=[], recovery_timeout=360): cluster_nodes = self.client.list_cluster_nodes(cluster_id) ceph_nodes = [n for n in cluster_nodes if 'ceph-osd' in n['roles'] and n['id'] not in offline_nodes] clock_skew_status = ['clock', 'skew', 'detected'] osd_recovery_status = ['degraded', 'recovery', 'osds', 'are', 'down'] logger.info('Waiting until Ceph service become up...') for node in ceph_nodes: remote = self.environment.get_ssh_to_remote(node['ip']) try: wait(lambda: checkers.check_ceph_ready(remote) is True, interval=20, timeout=120) except TimeoutError: logger.error('Ceph service is down on {0}'.format( node['name'])) raise logger.info('Ceph service is ready') logger.info('Checking Ceph Health...') for node in ceph_nodes: remote = self.environment.get_ssh_to_remote(node['ip']) health_status = checkers.get_ceph_health(remote) if 'HEALTH_OK' in health_status: continue elif 'HEALTH_WARN' in health_status: if checkers.check_ceph_health(remote, clock_skew_status): logger.warning('Clock skew detected in Ceph.') self.sync_ceph_time(ceph_nodes) try: wait(lambda: checkers.check_ceph_health(remote), interval=30, timeout=recovery_timeout) except TimeoutError: logger.error('Ceph HEALTH is bad on {0}'.format( node['name'])) raise elif checkers.check_ceph_health(remote, osd_recovery_status)\ and len(offline_nodes) > 0: logger.info('Ceph is being recovered after osd node(s)' ' shutdown.') try: wait(lambda: checkers.check_ceph_health(remote), interval=30, timeout=recovery_timeout) except TimeoutError: logger.error('Ceph HEALTH is bad on {0}'.format( node['name'])) raise else: assert_true(checkers.check_ceph_health(remote), 'Ceph health doesn\'t equal to "OK", please ' 'inspect debug logs for details') logger.info('Checking Ceph OSD Tree...') for node in ceph_nodes: remote = self.environment.get_ssh_to_remote(node['ip']) checkers.check_ceph_disks(remote, [n['id'] for n in ceph_nodes]) logger.info('Ceph cluster status is OK')
def ceph_multinode_with_cinder(self): """Deploy ceph with cinder in simple mode Scenario: 1. Create cluster 2. Add 1 node with controller and ceph OSD roles 3. Add 1 node with compute role 4. Add 2 nodes with cinder and ceph OSD roles 5. Deploy the cluster 6. Check ceph status Snapshot ceph_multinode_with_cinder """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready") self.env.bootstrap_nodes(self.env.nodes().slaves[:4]) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_ceph': True, 'images_ceph': True } ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['compute'], 'slave-03': ['cinder', 'ceph-osd'], 'slave-04': ['cinder', 'ceph-osd'] } ) # Cluster deploy self.fuel_web.deploy_cluster_wait(cluster_id) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) # Run ostf self.fuel_web.run_ostf( cluster_id=cluster_id, should_fail=4) # Cold restart self.fuel_web.restart_nodes(self.env.nodes().slaves[:4]) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) self.fuel_web.run_ostf( cluster_id=cluster_id, should_fail=4) self.env.make_snapshot("ceph_multinode_with_cinder")
def ceph_ha(self): """Deploy ceph with cinder in HA mode Scenario: 1. Create cluster 2. Add 3 nodes with controller and ceph OSD roles 3. Add 1 node with ceph OSD roles 4. Add 2 nodes with compute and ceph OSD roles 5. Deploy the cluster 6. Check ceph status Snapshot ceph_ha """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready") self.env.bootstrap_nodes(self.env.nodes().slaves[:6]) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_HA, settings={ 'volumes_ceph': True, 'images_ceph': True, 'volumes_lvm': False, 'tenant': 'cephHA', 'user': '******', 'password': '******' } ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller', 'ceph-osd'], 'slave-02': ['controller', 'ceph-osd'], 'slave-03': ['controller', 'ceph-osd'], 'slave-04': ['compute', 'ceph-osd'], 'slave-05': ['compute', 'ceph-osd'], 'slave-06': ['ceph-osd'] } ) # Depoy cluster self.fuel_web.deploy_cluster_wait(cluster_id) self.fuel_web.sync_ceph_time(cluster_id) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) # Run ostf self.fuel_web.run_ostf(cluster_id=cluster_id) self.env.make_snapshot("ceph_ha")
def ceph_multinode_compact(self): """Deploy ceph in simple mode Scenario: 1. Create cluster 2. Add 1 node with controller and ceph OSD roles 3. Add 1 node with compute and ceph OSD roles 4. Deploy the cluster 5. Check ceph status Snapshot: ceph_multinode_compact """ if OPENSTACK_RELEASE == OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready_with_3_slaves") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_ceph': True, 'images_ceph': True } ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller', 'ceph-osd'], 'slave-02': ['compute', 'ceph-osd'] } ) # configure disks to avoid "group requires minimum xxx" error node = self.fuel_web.get_nailgun_node_by_name('slave-01') self.fuel_web.update_node_disk(node['id'], { 'vda': {'os': 19852, 'image': 0}, 'vdb': {'image': 9000, 'ceph': 10852} }) node = self.fuel_web.get_nailgun_node_by_name('slave-02') self.fuel_web.update_node_disk(node['id'], { 'vda': {'os': 19852, 'vm': 0}, 'vdb': {'vm': 9000, 'ceph': 10852} }) self.fuel_web.deploy_cluster_wait(cluster_id) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) self.env.make_snapshot("ceph_multinode_compact")
def ceph_ha(self): """Deploy ceph with cinder in HA mode Scenario: 1. Create cluster 2. Add 3 nodes with controller and ceph OSD roles 3. Add 1 node with ceph OSD roles 4. Add 2 nodes with compute and ceph OSD roles 5. Deploy the cluster 6. Check ceph status Snapshot ceph_ha """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready") self.env.bootstrap_nodes(self.env.nodes().slaves[:6]) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_HA, settings={ 'volumes_ceph': True, 'images_ceph': True, 'volumes_lvm': False, 'tenant': 'cephHA', 'user': '******', 'password': '******' }) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller', 'ceph-osd'], 'slave-02': ['controller', 'ceph-osd'], 'slave-03': ['controller', 'ceph-osd'], 'slave-04': ['compute', 'ceph-osd'], 'slave-05': ['compute', 'ceph-osd'], 'slave-06': ['ceph-osd'] }) # Depoy cluster self.fuel_web.deploy_cluster_wait(cluster_id) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) # Run ostf self.fuel_web.run_ostf(cluster_id=cluster_id) self.env.make_snapshot("ceph_ha")
def ceph_multinode_compact(self): """Deploy ceph in simple mode Scenario: 1. Create cluster 2. Add 1 node with controller and ceph OSD roles 3. Add 2 node with compute and ceph OSD roles 4. Deploy the cluster 5. Check ceph status Snapshot ceph_multinode_compact """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready_with_3_slaves") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_ceph': True, 'images_ceph': True, 'volumes_lvm': False, 'tenant': 'ceph1', 'user': '******', 'password': '******' } ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller', 'ceph-osd'], 'slave-02': ['compute', 'ceph-osd'], 'slave-03': ['compute', 'ceph-osd'] } ) # Cluster deploy self.fuel_web.deploy_cluster_wait(cluster_id) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) # Run ostf self.fuel_web.run_ostf(cluster_id=cluster_id) self.env.make_snapshot("ceph_multinode_compact")
def ceph_multinode_compact(self): """Deploy ceph in simple mode Scenario: 1. Create cluster 2. Add 1 node with controller and ceph OSD roles 3. Add 2 node with compute and ceph OSD roles 4. Deploy the cluster 5. Check ceph status Snapshot ceph_multinode_compact """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready_with_3_slaves") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_ceph': True, 'images_ceph': True, 'volumes_lvm': False, 'tenant': 'ceph1', 'user': '******', 'password': '******' }) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller', 'ceph-osd'], 'slave-02': ['compute', 'ceph-osd'], 'slave-03': ['compute', 'ceph-osd'] }) # Cluster deploy self.fuel_web.deploy_cluster_wait(cluster_id) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) # Run ostf self.fuel_web.run_ostf(cluster_id=cluster_id) self.env.make_snapshot("ceph_multinode_compact")
def ceph_multinode_restart(self): """Deploy ceph with cinder in simple mode Scenario: 1. Create cluster 2. Add 1 node with controller and ceph OSD roles 3. Add 1 node with compute role 4. Add 2 nodes with cinder and ceph OSD roles 5. Deploy the cluster 7. Warm restart 8. Check ceph status Snapshot None """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready") self.env.bootstrap_nodes(self.env.nodes().slaves[:4]) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_ceph': True, 'volumes_lvm': False, }) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['compute'], 'slave-03': ['cinder', 'ceph-osd'], 'slave-04': ['cinder', 'ceph-osd'] }) # Cluster deploy self.fuel_web.deploy_cluster_wait(cluster_id) # Warm restart self.fuel_web.warm_restart_nodes(self.env.nodes().slaves[:4]) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) self.fuel_web.run_ostf(cluster_id=cluster_id)
def ceph_ha_cold_restart(self): """Cold restart for HA environment Scenario: 1. Revert snapshot: ceph_ha 2. Turn off all nodes 3. Start all nodes 4. Check ceph status 5. Run OSTF """ self.env.revert_snapshot("ceph_ha") self.fuel_web.restart_nodes(self.env.nodes().slaves[:6]) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) self.fuel_web.run_ostf( cluster_id=self.fuel_web.get_last_created_cluster(), should_fail=4, should_pass=18 )
def ceph_ha_destroy_compute(self): """Destroy OSD node for ceph HA Scenario: 1. Revert snapshot: ceph_ha 2. Destroy first compute + ceph OSD node 3. Check ceph status 4. Run OSTF """ if OPENSTACK_RELEASE == OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ceph_ha") self.env.nodes().slaves[4].destroy() check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) self.fuel_web.run_ostf( cluster_id=self.fuel_web.get_last_created_cluster(), should_fail=4, should_pass=18 )
def ceph_ha_restart(self): """Deploy ceph with in HA mode Scenario: 1. Create cluster 2. Add 3 nodes with controller and ceph OSD roles 3. Add 1 node with ceph OSD roles 4. Add 2 nodes with compute and ceph OSD roles 5. Deploy the cluster 6. Check ceph status 7. Cold retsart 8. Check ceph status Snapshot ceph_ha """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready") self.env.bootstrap_nodes(self.env.nodes().slaves[:6]) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_HA, settings={ 'volumes_ceph': True, 'images_ceph': True, 'volumes_lvm': False, } ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller', 'ceph-osd'], 'slave-02': ['controller', 'ceph-osd'], 'slave-03': ['controller', 'ceph-osd'], 'slave-04': ['compute', 'ceph-osd'], 'slave-05': ['compute', 'ceph-osd'], 'slave-06': ['ceph-osd'] } ) # Depoy cluster self.fuel_web.deploy_cluster_wait(cluster_id) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) # Run ostf self.fuel_web.run_ostf(cluster_id=cluster_id) # Destroy osd-node self.env.nodes().slaves[5].destroy() wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node( self.env.nodes().slaves[5])['online'], timeout=30 * 8) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) self.fuel_web.run_ostf( cluster_id=cluster_id) # Destroy compute node self.env.nodes().slaves[4].destroy() wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node( self.env.nodes().slaves[4])['online'], timeout=30 * 8) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) self.fuel_web.run_ostf(cluster_id=cluster_id, should_fail=1) # Cold restart self.fuel_web.cold_restart_nodes(self.env.nodes().slaves[:4]) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) # Wait until MySQL Galera is UP on primary controller self.fuel_web.wait_mysql_galera_is_up(['slave-01']) self.fuel_web.run_ostf(cluster_id=cluster_id, should_fail=1) self.env.make_snapshot("ceph_ha")
def check_ceph_partitions_after_reboot(self): """Check that Ceph OSD partitions are remounted after reboot Scenario: 1. Create cluster 2. Add 1 node with controller role 3. Add 1 node with compute and Ceph OSD roles 4. Add 1 node with Ceph OSD role 5. Deploy the cluster 7. Check Ceph status 8. Read current partitions 9. Warm-reboot Ceph nodes 10. Read partitions again 11. Check Ceph health 12. Cold-reboot Ceph nodes 13. Read partitions again 14. Check Ceph health Snapshot check_ceph_partitions_after_reboot """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise proboscis.SkipTest() self.env.revert_snapshot("ready_with_3_slaves") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_ceph': True, 'images_ceph': True, 'ephemeral_ceph': True, 'volumes_lvm': False } ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['compute', 'ceph-osd'], 'slave-03': ['ceph-osd'] } ) # Deploy cluster self.fuel_web.deploy_cluster_wait(cluster_id) for node in ["slave-02", "slave-03"]: logger.info("Get partitions for {node}".format(node=node)) before_reboot_partitions = [checkers.get_ceph_partitions( self.env.get_ssh_to_remote_by_name(node), "/dev/vd{p}".format(p=part)) for part in ["b", "c"]] logger.info("Warm-restart nodes") self.fuel_web.warm_restart_nodes( [self.fuel_web.environment.get_virtual_environment(). node_by_name(node)]) logger.info("Get partitions for {node} once again".format( node=node )) after_reboot_partitions = [checkers.get_ceph_partitions( self.env.get_ssh_to_remote_by_name(node), "/dev/vd{p}".format(p=part)) for part in ["b", "c"]] if before_reboot_partitions != after_reboot_partitions: logger.info("Partitions don`t match") logger.info("Before reboot: %s" % before_reboot_partitions) logger.info("After reboot: %s" % after_reboot_partitions) raise Exception() logger.info("Check Ceph health is ok after reboot") check_ceph_health( self.env.get_ssh_to_remote_by_name(node)) logger.info("Cold-restart nodes") self.fuel_web.cold_restart_nodes( [self.fuel_web.environment.get_virtual_environment(). node_by_name(node)]) after_reboot_partitions = [checkers.get_ceph_partitions( self.env.get_ssh_to_remote_by_name(node), "/dev/vd{p}".format(p=part)) for part in ["b", "c"]] if before_reboot_partitions != after_reboot_partitions: logger.info("Partitions don`t match") logger.info("Before reboot: %s" % before_reboot_partitions) logger.info("After reboot: %s" % after_reboot_partitions) raise Exception() logger.info("Check Ceph health is ok after reboot") check_ceph_health( self.env.get_ssh_to_remote_by_name(node))
def ceph_ha_restart(self): """Deploy ceph with cinder in HA mode Scenario: 1. Create cluster 2. Add 3 nodes with controller and ceph OSD roles 3. Add 1 node with cinder and ceph OSD roles 4. Add 2 nodes with compute and ceph OSD roles 5. Deploy the cluster 6. Check ceph status 7. Cold retsart 8. Check ceph status Snapshot ceph_ha """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready") self.env.bootstrap_nodes(self.env.nodes().slaves[:6]) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_HA, settings={ 'volumes_ceph': True, 'images_ceph': True } ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller', 'ceph-osd'], 'slave-02': ['controller', 'ceph-osd'], 'slave-03': ['controller', 'ceph-osd'], 'slave-04': ['compute', 'ceph-osd'], 'slave-05': ['compute', 'ceph-osd'], 'slave-06': ['cinder', 'ceph-osd'] } ) # Depoy cluster self.fuel_web.deploy_cluster_wait(cluster_id) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) # Run ostf self.fuel_web.run_ostf(cluster_id=cluster_id) # Destroy osd-node self.env.nodes().slaves[5].destroy() check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01'), recovery_timeout=True) self.fuel_web.run_ostf( cluster_id=cluster_id, should_fail=0) # Destroy compute node self.env.nodes().slaves[4].destroy() check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01'), recovery_timeout=True) self.fuel_web.run_ostf(cluster_id=cluster_id) # Cold restart self.fuel_web.cold_restart_nodes(self.env.nodes().slaves[:4]) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) self.fuel_web.run_ostf(cluster_id=cluster_id) self.env.make_snapshot("ceph_ha")
def ceph_rados_gw(self): """Deploy ceph with RadosGW for objects Scenario: 1. Create cluster 2. Add 1 node with controller role 3. Add 1 node with compute role 4. Add 3 nodes with ceph-osd role 5. Deploy the cluster 6. Check ceph status 7. Run OSTF tests 8. Check the radosqw daemon is started Snapshot ceph_rados_gw """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready_with_5_slaves") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_lvm': False, 'volumes_ceph': True, 'images_ceph': True, 'objects_ceph': True } ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['compute'], 'slave-03': ['ceph-osd'], 'slave-04': ['ceph-osd'], 'slave-05': ['ceph-osd'] } ) # Deploy cluster self.fuel_web.deploy_cluster_wait(cluster_id) remote = self.fuel_web.get_ssh_for_node('slave-01') check_ceph_health(remote) # Run ostf self.fuel_web.run_ostf( cluster_id=cluster_id, test_sets=['smoke', 'sanity', 'platform_tests'], should_fail=1, failed_test_name=['Check stack autoscaling']) # Check the radosqw daemon is started radosgw_started = lambda: len(remote.check_call( 'ps aux | grep "/usr/bin/radosgw -n ' 'client.radosgw.gateway"')['stdout']) == 3 assert_true(radosgw_started(), 'radosgw daemon started') self.env.make_snapshot("ceph_rados_gw")
def migrate_vm_backed_with_ceph(self): """Check VM backed with ceph migration in simple mode Scenario: 1. Create cluster 2. Add 1 node with controller and ceph OSD roles 3. Add 2 node with compute and ceph OSD roles 4. Deploy the cluster 5. Check ceph status 6. Run OSTF 7. Create a new VM, assign floating ip 8. Migrate VM 9. Check cluster and server state after migration 10. Terminate VM Snapshot vm_backed_with_ceph_live_migration """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise proboscis.SkipTest() self.env.revert_snapshot("ready_with_3_slaves") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_ceph': True, 'images_ceph': True, 'ephemeral_ceph': True, 'volumes_lvm': False } ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller', 'ceph-osd'], 'slave-02': ['compute', 'ceph-osd'], 'slave-03': ['compute', 'ceph-osd'] } ) creds = ("cirros", "test") # Cluster deploy self.fuel_web.deploy_cluster_wait(cluster_id) # Create new server os = os_actions.OpenStackActions( self.fuel_web.get_nailgun_node_by_name("slave-01")["ip"]) logger.info("Create new server") srv = os.create_server_for_migration( scenario='./fuelweb_test/helpers/instance_initial_scenario') logger.info("Srv is currently in status: %s" % srv.status) assert_true(srv.status != "ERROR") logger.info("Assigning floating ip to server") floating_ip = os.assign_floating_ip(srv) srv_host = os.get_srv_host_name(srv) logger.info("Server is on host %s" % srv_host) time.sleep(100) md5before = os.get_md5sum( "/home/test_file", self.env.get_ssh_to_remote_by_name("slave-01"), floating_ip.ip, creds) logger.info("Get available computes") avail_hosts = os.get_hosts_for_migr(srv_host) logger.info("Migrating server") new_srv = os.migrate_server(srv, avail_hosts[0], timeout=120) logger.info("Check cluster and server state after migration") assert_true(new_srv.status == "ACTIVE", "Server didn`t reach ACTIVE status. " "Status is: %s" % new_srv.status) md5after = os.get_md5sum( "/home/test_file", self.env.get_ssh_to_remote_by_name("slave-01"), floating_ip.ip, creds) assert_true( md5after in md5before, "Md5 checksums don`t match." "Before migration md5 was equal to: {bef}" "Now it eqals: {aft}".format(bef=md5before, aft=md5after)) res = os.execute_through_host( self.env.get_ssh_to_remote_by_name("slave-01"), floating_ip.ip, "ping -q -c3 -w3 %s | grep 'received' |" " grep -v '0 packets received'", creds) logger.info("Ping 8.8.8.8 result on vm is: %s" % res) logger.info("Check Ceph health is ok after migration") check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) logger.info("Server is now on host %s" % os.get_srv_host_name(new_srv)) logger.info("Terminate migrated server") os.delete_instance(new_srv) assert_true(os.verify_srv_deleted(new_srv), "Verify server was deleted") # Create new server os = os_actions.OpenStackActions( self.fuel_web.get_nailgun_node_by_name("slave-01")["ip"]) logger.info("Create new server") srv = os.create_server_for_migration( scenario='./fuelweb_test/helpers/instance_initial_scenario') logger.info("Srv is currently in status: %s" % srv.status) assert_true(srv.status != "ERROR") logger.info("Assigning floating ip to server") floating_ip = os.assign_floating_ip(srv) srv_host = os.get_srv_host_name(srv) logger.info("Server is on host %s" % srv_host) logger.info("Create volume") vol = os.create_volume() logger.info("Attach volume to server") os.attach_volume(vol, srv) time.sleep(100) logger.info("Create filesystem and mount volume") os.execute_through_host( self.env.get_ssh_to_remote_by_name('slave-01'), floating_ip.ip, 'sudo sh /home/mount_volume.sh', creds) os.execute_through_host( self.env.get_ssh_to_remote_by_name('slave-01'), floating_ip.ip, 'sudo touch /mnt/file-on-volume', creds) logger.info("Get available computes") avail_hosts = os.get_hosts_for_migr(srv_host) logger.info("Migrating server") new_srv = os.migrate_server(srv, avail_hosts[0], timeout=120) logger.info("Check cluster and server state after migration") assert_true(new_srv.status == "ACTIVE", "Server did not reach active state. " "Current status is: %s" % new_srv.status) logger.info("Mount volume after migration") out = os.execute_through_host( self.env.get_ssh_to_remote_by_name('slave-01'), floating_ip.ip, 'sudo mount /dev/vdb /mnt', creds) logger.info("out of mounting volume is: %s" % out) assert_true("file-on-volume" in os.execute_through_host( self.env.get_ssh_to_remote_by_name('slave-01'), floating_ip.ip, "sudo ls /mnt", creds), "File is abscent in /mnt") logger.info("Check Ceph health is ok after migration") check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) logger.info("Server is now on host %s" % os.get_srv_host_name(new_srv)) logger.info("Terminate migrated server") os.delete_instance(new_srv) assert_true(os.verify_srv_deleted(new_srv), "Verify server was deleted") self.env.make_snapshot( "vm_backed_with_ceph_live_migration")
def ceph_rados_gw(self): """Deploy ceph with RadosGW for objects Scenario: 1. Create cluster 2. Add 1 node with controller role 3. Add 1 node with compute role 4. Add 3 nodes with ceph-osd role 5. Deploy the cluster 6. Check ceph status 7. Run OSTF tests 8. Check the radosqw daemon is started Snapshot ceph_rados_gw """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready_with_5_slaves") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_lvm': False, 'volumes_ceph': True, 'images_ceph': True, 'objects_ceph': True, 'tenant': 'rados', 'user': '******', 'password': '******' }) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['compute'], 'slave-03': ['ceph-osd'], 'slave-04': ['ceph-osd'], 'slave-05': ['ceph-osd'] }) # Deploy cluster self.fuel_web.deploy_cluster_wait(cluster_id) remote = self.fuel_web.get_ssh_for_node('slave-01') check_ceph_health(remote) def _check(): # Run volume test several times with hope that it pass test_path = map_ostf.OSTF_TEST_MAPPING.get( 'Create volume and attach it to instance') logger.debug('Start to run test {0}'.format(test_path)) self.fuel_web.run_single_ostf_test(cluster_id, test_sets=['smoke'], test_name=test_path, should_fail=0) try: _check() except AssertionError: logger.debug(AssertionError) logger.debug("Test failed from first probe," " we sleep 60 second try one more time " "and if it fails again - test will fails ") time.sleep(60) _check() # Run ostf self.fuel_web.run_ostf(cluster_id=cluster_id, test_sets=['smoke', 'sanity', 'platform_tests'], should_fail=1, failed_test_name=['Check stack autoscaling']) # Check the radosqw daemon is started radosgw_started = lambda: len( remote.check_call('ps aux | grep "/usr/bin/radosgw -n ' 'client.radosgw.gateway"')['stdout']) == 3 assert_true(radosgw_started(), 'radosgw daemon started') self.env.make_snapshot("ceph_rados_gw")
def ceph_rados_gw(self): """Deploy ceph with RadosGW for objects Scenario: 1. Create cluster 2. Add 1 node with controller role 3. Add 1 node with compute role 4. Add 3 nodes with ceph-osd role 5. Deploy the cluster 6. Check ceph status 7. Run OSTF tests 8. Check the radosqw daemon is started Snapshot ceph_rados_gw """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready_with_5_slaves") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_lvm': False, 'volumes_ceph': True, 'images_ceph': True, 'objects_ceph': True, 'tenant': 'rados', 'user': '******', 'password': '******' } ) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['compute'], 'slave-03': ['ceph-osd'], 'slave-04': ['ceph-osd'], 'slave-05': ['ceph-osd'] } ) # Deploy cluster self.fuel_web.deploy_cluster_wait(cluster_id) remote = self.fuel_web.get_ssh_for_node('slave-01') check_ceph_health(remote) def _check(): # Run volume test several times with hope that it pass test_path = map_ostf.OSTF_TEST_MAPPING.get( 'Create volume and attach it to instance') logger.debug('Start to run test {0}'.format(test_path)) self.fuel_web.run_single_ostf_test( cluster_id, test_sets=['smoke'], test_name=test_path, should_fail=0) try: _check() except AssertionError: logger.debug(AssertionError) logger.debug("Test failed from first probe," " we sleep 60 second try one more time " "and if it fails again - test will fails ") time.sleep(60) _check() # Run ostf self.fuel_web.run_ostf( cluster_id=cluster_id, test_sets=['smoke', 'sanity', 'platform_tests'], should_fail=1, failed_test_name=['Check stack autoscaling']) # Check the radosqw daemon is started radosgw_started = lambda: len(remote.check_call( 'ps aux | grep "/usr/bin/radosgw -n ' 'client.radosgw.gateway"')['stdout']) == 3 assert_true(radosgw_started(), 'radosgw daemon started') self.env.make_snapshot("ceph_rados_gw")
def ceph_ha_restart(self): """Deploy ceph with in HA mode Scenario: 1. Create cluster 2. Add 3 nodes with controller and ceph OSD roles 3. Add 1 node with ceph OSD roles 4. Add 2 nodes with compute and ceph OSD roles 5. Deploy the cluster 6. Check ceph status 7. Cold retsart 8. Check ceph status Snapshot ceph_ha """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise SkipTest() self.env.revert_snapshot("ready") self.env.bootstrap_nodes(self.env.nodes().slaves[:6]) cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_HA, settings={ 'volumes_ceph': True, 'images_ceph': True, 'volumes_lvm': False, }) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller', 'ceph-osd'], 'slave-02': ['controller', 'ceph-osd'], 'slave-03': ['controller', 'ceph-osd'], 'slave-04': ['compute', 'ceph-osd'], 'slave-05': ['compute', 'ceph-osd'], 'slave-06': ['ceph-osd'] }) # Depoy cluster self.fuel_web.deploy_cluster_wait(cluster_id) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) # Run ostf self.fuel_web.run_ostf(cluster_id=cluster_id) # Destroy osd-node self.env.nodes().slaves[5].destroy() wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node( self.env.nodes().slaves[5])['online'], timeout=30 * 8) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) self.fuel_web.run_ostf(cluster_id=cluster_id, should_fail=0) # Destroy compute node self.env.nodes().slaves[4].destroy() wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node( self.env.nodes().slaves[4])['online'], timeout=30 * 8) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) self.fuel_web.run_ostf(cluster_id=cluster_id, should_fail=1) # Cold restart self.fuel_web.cold_restart_nodes(self.env.nodes().slaves[:4]) check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) self.fuel_web.run_ostf(cluster_id=cluster_id) self.env.make_snapshot("ceph_ha")
def migrate_vm_backed_with_ceph(self): """Check VM backed with ceph migration in simple mode Scenario: 1. Create cluster 2. Add 1 node with controller and ceph OSD roles 3. Add 2 node with compute and ceph OSD roles 4. Deploy the cluster 5. Check ceph status 6. Run OSTF 7. Create a new VM, assign floating ip 8. Migrate VM 9. Check cluster and server state after migration 10. Terminate VM Snapshot vm_backed_with_ceph_live_migration """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise proboscis.SkipTest() self.env.revert_snapshot("ready_with_3_slaves") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_ceph': True, 'images_ceph': True, 'ephemeral_ceph': True, 'volumes_lvm': False }) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller', 'ceph-osd'], 'slave-02': ['compute', 'ceph-osd'], 'slave-03': ['compute', 'ceph-osd'] }) creds = ("cirros", "test") # Cluster deploy self.fuel_web.deploy_cluster_wait(cluster_id) def _check(): # Run volume test several times with hope that it pass test_path = map_ostf.OSTF_TEST_MAPPING.get( 'Create volume and attach it to instance') logger.debug('Start to run test {0}'.format(test_path)) self.fuel_web.run_single_ostf_test(cluster_id, test_sets=['smoke'], test_name=test_path, should_fail=0) try: _check() except AssertionError: logger.debug(AssertionError) logger.debug("Test failed from first probe," " we sleep 60 second try one more time " "and if it fails again - test will fails ") time.sleep(60) _check() # Run ostf self.fuel_web.run_ostf(cluster_id) # Create new server os = os_actions.OpenStackActions( self.fuel_web.get_nailgun_node_by_name("slave-01")["ip"]) logger.info("Create new server") srv = os.create_server_for_migration( scenario='./fuelweb_test/helpers/instance_initial_scenario') logger.info("Srv is currently in status: %s" % srv.status) logger.info("Assigning floating ip to server") floating_ip = os.assign_floating_ip(srv) srv_host = os.get_srv_host_name(srv) logger.info("Server is on host %s" % srv_host) time.sleep(100) md5before = os.get_md5sum( "/home/test_file", self.env.get_ssh_to_remote_by_name("slave-01"), floating_ip.ip, creds) logger.info("Get available computes") avail_hosts = os.get_hosts_for_migr(srv_host) logger.info("Migrating server") new_srv = os.migrate_server(srv, avail_hosts[0], timeout=200) logger.info("Check cluster and server state after migration") md5after = os.get_md5sum( "/home/test_file", self.env.get_ssh_to_remote_by_name("slave-01"), floating_ip.ip, creds) assert_true( md5after in md5before, "Md5 checksums don`t match." "Before migration md5 was equal to: {bef}" "Now it eqals: {aft}".format(bef=md5before, aft=md5after)) res = os.execute_through_host( self.env.get_ssh_to_remote_by_name("slave-01"), floating_ip.ip, "ping -q -c3 -w10 %s | grep 'received' |" " grep -v '0 packets received'", creds) logger.info("Ping 8.8.8.8 result on vm is: %s" % res) logger.info("Check Ceph health is ok after migration") check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) logger.info("Server is now on host %s" % os.get_srv_host_name(new_srv)) logger.info("Terminate migrated server") os.delete_instance(new_srv) assert_true(os.verify_srv_deleted(new_srv), "Verify server was deleted") # Create new server os = os_actions.OpenStackActions( self.fuel_web.get_nailgun_node_by_name("slave-01")["ip"]) logger.info("Create new server") srv = os.create_server_for_migration( scenario='./fuelweb_test/helpers/instance_initial_scenario') logger.info("Srv is currently in status: %s" % srv.status) logger.info("Assigning floating ip to server") floating_ip = os.assign_floating_ip(srv) srv_host = os.get_srv_host_name(srv) logger.info("Server is on host %s" % srv_host) logger.info("Create volume") vol = os.create_volume() logger.info("Attach volume to server") os.attach_volume(vol, srv) time.sleep(100) logger.info("Create filesystem and mount volume") os.execute_through_host(self.env.get_ssh_to_remote_by_name('slave-01'), floating_ip.ip, 'sudo sh /home/mount_volume.sh', creds) os.execute_through_host(self.env.get_ssh_to_remote_by_name('slave-01'), floating_ip.ip, 'sudo touch /mnt/file-on-volume', creds) logger.info("Get available computes") avail_hosts = os.get_hosts_for_migr(srv_host) logger.info("Migrating server") new_srv = os.migrate_server(srv, avail_hosts[0], timeout=120) logger.info("Check cluster and server state after migration") logger.info("Mount volume after migration") out = os.execute_through_host( self.env.get_ssh_to_remote_by_name('slave-01'), floating_ip.ip, 'sudo mount /dev/vdb /mnt', creds) logger.info("out of mounting volume is: %s" % out) assert_true( "file-on-volume" in os.execute_through_host( self.env.get_ssh_to_remote_by_name('slave-01'), floating_ip.ip, "sudo ls /mnt", creds), "File is abscent in /mnt") logger.info("Check Ceph health is ok after migration") check_ceph_health(self.env.get_ssh_to_remote_by_name('slave-01')) logger.info("Server is now on host %s" % os.get_srv_host_name(new_srv)) logger.info("Terminate migrated server") os.delete_instance(new_srv) assert_true(os.verify_srv_deleted(new_srv), "Verify server was deleted") self.env.make_snapshot("vm_backed_with_ceph_live_migration")
def check_ceph_partitions_after_reboot(self): """Check that Ceph OSD partitions are remounted after reboot Scenario: 1. Create cluster 2. Add 1 node with controller role 3. Add 1 node with compute and Ceph OSD roles 4. Add 1 node with Ceph OSD role 5. Deploy the cluster 7. Check Ceph status 8. Read current partitions 9. Warm-reboot Ceph nodes 10. Read partitions again 11. Check Ceph health 12. Cold-reboot Ceph nodes 13. Read partitions again 14. Check Ceph health Snapshot check_ceph_partitions_after_reboot """ if settings.OPENSTACK_RELEASE == settings.OPENSTACK_RELEASE_REDHAT: raise proboscis.SkipTest() self.env.revert_snapshot("ready_with_3_slaves") cluster_id = self.fuel_web.create_cluster( name=self.__class__.__name__, mode=settings.DEPLOYMENT_MODE_SIMPLE, settings={ 'volumes_ceph': True, 'images_ceph': True, 'ephemeral_ceph': True, 'volumes_lvm': False }) self.fuel_web.update_nodes( cluster_id, { 'slave-01': ['controller'], 'slave-02': ['compute', 'ceph-osd'], 'slave-03': ['ceph-osd'] }) # Deploy cluster self.fuel_web.deploy_cluster_wait(cluster_id) for node in ["slave-02", "slave-03"]: logger.info("Get partitions for {node}".format(node=node)) before_reboot_partitions = [ checkers.get_ceph_partitions( self.env.get_ssh_to_remote_by_name(node), "/dev/vd{p}".format(p=part)) for part in ["b", "c"] ] logger.info("Warm-restart nodes") self.fuel_web.warm_restart_nodes([ self.fuel_web.environment.get_virtual_environment(). node_by_name(node) ]) logger.info( "Get partitions for {node} once again".format(node=node)) after_reboot_partitions = [ checkers.get_ceph_partitions( self.env.get_ssh_to_remote_by_name(node), "/dev/vd{p}".format(p=part)) for part in ["b", "c"] ] if before_reboot_partitions != after_reboot_partitions: logger.info("Partitions don`t match") logger.info("Before reboot: %s" % before_reboot_partitions) logger.info("After reboot: %s" % after_reboot_partitions) raise Exception() logger.info("Check Ceph health is ok after reboot") check_ceph_health(self.env.get_ssh_to_remote_by_name(node)) logger.info("Cold-restart nodes") self.fuel_web.cold_restart_nodes([ self.fuel_web.environment.get_virtual_environment(). node_by_name(node) ]) after_reboot_partitions = [ checkers.get_ceph_partitions( self.env.get_ssh_to_remote_by_name(node), "/dev/vd{p}".format(p=part)) for part in ["b", "c"] ] if before_reboot_partitions != after_reboot_partitions: logger.info("Partitions don`t match") logger.info("Before reboot: %s" % before_reboot_partitions) logger.info("After reboot: %s" % after_reboot_partitions) raise Exception() logger.info("Check Ceph health is ok after reboot") check_ceph_health(self.env.get_ssh_to_remote_by_name(node))