def setup_cloud_info(cls, client, src_std): """ Retrieve the cloud init file :param client: SSHclient to use for cloud initialization :type client: ovs.extensions.generic.SSHClient :param src_std: storagedriver to check which edition is running :type src_std: ovs.dal.hybrids.StorageDriver :return: """ cloud_init_loc = cls.CLOUD_INIT_DATA.get('script_dest') client.run(['wget', cls.CLOUD_INIT_DATA.get('script_loc'), '-O', cloud_init_loc]) client.file_chmod(cloud_init_loc, 755) assert client.file_exists(cloud_init_loc), 'Could not fetch the cloud init script' is_ee = SystemHelper.get_ovs_version(src_std.storagerouter) == 'ee' return cloud_init_loc, is_ee
def setup(cls): vpools = VPoolHelper.get_vpools() assert len(vpools) >= 1, "Not enough vPools to test" vpool = vpools[0] # Just pick the first vpool you find assert len( vpool.storagedrivers) >= 1, "Not enough Storagedrivers to test" storagedriver = vpool.storagedrivers[ 0] # just pick the first storagedriver you find source_str = storagedriver.storagerouter client = SSHClient(source_str, username='******') is_ee = SystemHelper.get_ovs_version(source_str) == 'ee' if is_ee is True: fio_bin_loc = cls.FIO_BIN_EE['location'] fio_bin_url = cls.FIO_BIN_EE['url'] else: fio_bin_loc = cls.FIO_BIN['location'] fio_bin_url = cls.FIO_BIN['url'] client.run(['wget', fio_bin_url, '-O', fio_bin_loc]) client.file_chmod(fio_bin_loc, 755) return storagedriver, fio_bin_loc, is_ee
def validate_services(tries=SERVICE_TRIES, timeout=SERVICE_TIMEOUT): """ Validate if all services come up after installation of the setup :param tries: amount of tries to check if ovs services are running :type tries: int :param timeout: timeout between tries :type timeout: int :return: """ ServiceChecks.LOGGER.info('Starting validating services') storagerouter_ips = StoragerouterHelper.get_storagerouter_ips() assert len( storagerouter_ips) >= 1, "We need at least 1 storagerouters!" # commence test for storagerouter_ip in storagerouter_ips: ServiceChecks.LOGGER.info( 'Starting service check on node `{0}`'.format( storagerouter_ip)) amount_tries = 0 non_running_services = None client = SSHClient(storagerouter_ip, username='******') while tries >= amount_tries: non_running_services = SystemHelper.get_non_running_ovs_services( client) if len(non_running_services) == 0: break else: amount_tries += 1 time.sleep(timeout) assert len( non_running_services ) == 0, "Found non running services `{0}` after reboot on node `{1}`".format( non_running_services, storagerouter_ip) ServiceChecks.LOGGER.info( 'Finished validating services on node `{0}`'.format( storagerouter_ip)) ServiceChecks.LOGGER.info('Finished validating services')
def setup(cls, logger=LOGGER): destination_str, source_str, compute_str = StoragerouterHelper().get_storagerouters_by_role() destination_storagedriver = None source_storagedriver = None if len(source_str.regular_domains) == 0: storagedrivers = StoragedriverHelper.get_storagedrivers() else: storagedrivers = DomainHelper.get_storagedrivers_in_same_domain(domain_guid=source_str.regular_domains[0]) for storagedriver in storagedrivers: if len(storagedriver.vpool.storagedrivers) < 2: continue if storagedriver.guid in destination_str.storagedrivers_guids: if destination_storagedriver is None and (source_storagedriver is None or source_storagedriver.vpool_guid == storagedriver.vpool_guid): destination_storagedriver = storagedriver logger.info('Chosen destination storagedriver is: {0}'.format(destination_storagedriver.storage_ip)) elif storagedriver.guid in source_str.storagedrivers_guids: # Select if the source driver isn't select and destination is also unknown or the storagedriver has matches with the same vpool if source_storagedriver is None and (destination_storagedriver is None or destination_storagedriver.vpool_guid == storagedriver.vpool_guid): source_storagedriver = storagedriver logger.info('Chosen source storagedriver is: {0}'.format(source_storagedriver.storage_ip)) assert source_storagedriver is not None and destination_storagedriver is not None, 'We require at least two storagedrivers within the same domain.' cluster_info = {'storagerouters': {'destination': destination_str, 'source': source_str, 'compute': compute_str}, 'storagedrivers': {'destination': destination_storagedriver, 'source': source_storagedriver}} compute_client = SSHClient(compute_str, username='******') is_ee = SystemHelper.get_ovs_version(source_str) == 'ee' if is_ee is True: fio_bin_loc = cls.FIO_BIN_EE['location'] fio_bin_url = cls.FIO_BIN_EE['url'] else: fio_bin_loc = cls.FIO_BIN['location'] fio_bin_url = cls.FIO_BIN['url'] compute_client.run(['wget', fio_bin_url, '-O', fio_bin_loc]) compute_client.file_chmod(fio_bin_loc, 755) return cluster_info, is_ee, fio_bin_loc
def validate_vdisk_deployment(cls): """ Validate if vdisk deployment works via various ways INFO: 1 vPool should be available on 1 storagerouter :return: """ cls.LOGGER.info("Starting to validate the vdisk deployment") vpools = VPoolHelper.get_vpools() assert len(vpools) >= 1, "Not enough vPools to test" vpool = vpools[0] # just pick the first vpool you find assert len( vpool.storagedrivers) >= 1, "Not enough Storagedrivers to test" # setup base information storagedriver = vpool.storagedrivers[0] protocol = storagedriver.cluster_node_config[ 'network_server_uri'].split(':')[0] storage_ip = storagedriver.storage_ip edge_port = storagedriver.ports['edge'] client = SSHClient(storagedriver.storage_ip, username='******') # ======= # VIA API # ======= for size in cls.VDISK_SIZES: api_disk_name = cls.PREFIX + str(size) + '-api' cls.LOGGER.info( "Starting to create vdisk `{0}` on vPool `{1}` with size `{2}` " "on node `{3}`".format(api_disk_name, vpool.name, size, storagedriver.storagerouter.ip)) VDiskSetup.create_vdisk( vdisk_name=api_disk_name + '.raw', vpool_name=vpool.name, size=size, storagerouter_ip=storagedriver.storagerouter.ip, timeout=cls.VDISK_CREATE_TIMEOUT) cls.LOGGER.info( "Finished creating vdisk `{0}`".format(api_disk_name)) cls._check_vdisk(vdisk_name=api_disk_name, vpool_name=vpool.name) cls.LOGGER.info( "Starting to delete vdisk `{0}`".format(api_disk_name)) VDiskRemover.remove_vdisk_by_name(api_disk_name, vpool.name) cls.LOGGER.info( "Finished deleting vdisk `{0}`".format(api_disk_name)) # ======== # VIA QEMU # ======== for size in cls.VDISK_SIZES: qemu_disk_name = cls.PREFIX + str(size) + '-qemu' edge_info = { 'port': edge_port, 'protocol': protocol, 'ip': storage_ip, } if SystemHelper.get_ovs_version( storagedriver.storagerouter) == 'ee': edge_info.update(cls.get_shell_user()) VMHandler.create_image(client, qemu_disk_name, size, edge_info) cls.LOGGER.info( "Finished creating vdisk `{0}`".format(qemu_disk_name)) cls._check_vdisk(vdisk_name=qemu_disk_name, vpool_name=vpool.name) cls.LOGGER.info( "Starting to delete vdisk `{0}`".format(qemu_disk_name)) VDiskRemover.remove_vdisk_by_name(qemu_disk_name, vpool.name) cls.LOGGER.info( "Finished deleting vdisk `{0}`".format(qemu_disk_name)) # ============ # VIA TRUNCATE # ============ for size in cls.VDISK_SIZES: truncate_disk_name = cls.PREFIX + str(size) + '-trunc' cls.LOGGER.info( "Starting to create vdisk `{0}` on vPool `{1}` on node `{2}` " "with size `{3}`".format(truncate_disk_name, vpool.name, storagedriver.storage_ip, size)) client.run([ "truncate", "-s", str(size), "/mnt/{0}/{1}.raw".format(vpool.name, truncate_disk_name) ]) cls.LOGGER.info( "Finished creating vdisk `{0}`".format(truncate_disk_name)) cls._check_vdisk(vdisk_name=truncate_disk_name, vpool_name=vpool.name) cls.LOGGER.info( "Starting to delete vdisk `{0}`".format(truncate_disk_name)) VDiskRemover.remove_vdisk_by_name(truncate_disk_name, vpool.name) cls.LOGGER.info( "Finished deleting vdisk `{0}`".format(truncate_disk_name)) cls.LOGGER.info("Finished to validate the vdisk deployment")
def test_ha_fio(cls, fio_bin_path, cluster_info, is_ee, disk_amount=1, timeout=CIConstants.HA_TIMEOUT, logger=LOGGER): """ Uses a modified fio to work with the openvstorage protocol :param fio_bin_path: path of the fio binary :type fio_bin_path: str :param cluster_info: information about the cluster, contains all dal objects :type cluster_info: dict :param is_ee: is it an ee version or not :type is_ee: bool :param disk_amount: amount of disks to test fail over with :type disk_amount: int :param timeout: timeout in seconds :type timeout: int :param logger: logging instance :return: None :rtype: NoneType """ destination_storagedriver = cluster_info['storagedrivers'][ 'destination'] source_storagedriver = cluster_info['storagedrivers']['source'] vpool = destination_storagedriver.vpool compute_client = SSHClient(cluster_info['storagerouters']['compute'], username='******') vm_to_stop = cls.HYPERVISOR_INFO['vms'][ source_storagedriver.storage_ip]['name'] parent_hypervisor = HypervisorFactory().get() values_to_check = { 'source_std': source_storagedriver.serialize(), 'target_std': destination_storagedriver.serialize(), 'vdisks': [] } # Create vdisks protocol = source_storagedriver.cluster_node_config[ 'network_server_uri'].split(':')[0] edge_configuration = { 'fio_bin_location': fio_bin_path, 'hostname': source_storagedriver.storage_ip, 'port': source_storagedriver.ports['edge'], 'protocol': protocol, 'volumenames': [] } if is_ee is True: edge_configuration.update(cls.get_shell_user()) vdisk_info = {} failed_configurations = [] for index in xrange(0, disk_amount): try: vdisk_name = '{0}_vdisk{1}'.format(cls.TEST_NAME, str(index).zfill(3)) data_vdisk = VDiskHelper.get_vdisk_by_guid( VDiskSetup.create_vdisk(vdisk_name, vpool.name, cls.AMOUNT_TO_WRITE, source_storagedriver.storage_ip)) vdisk_info[vdisk_name] = data_vdisk edge_configuration['volumenames'].append( data_vdisk.devicename.rsplit('.', 1)[0].split('/', 1)[1]) values_to_check['vdisks'].append(data_vdisk.serialize()) except TimeOutError: logger.error('Creating the vdisk has timed out.') raise except RuntimeError as ex: logger.error('Could not create the vdisk. Got {0}'.format( str(ex))) raise configuration = random.choice(cls.DATA_TEST_CASES) threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None}}} vm_downed = False screen_names = [] try: logger.info( 'Starting threads.' ) # Separate because creating vdisks takes a while, while creating the threads does not io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads( volume_bundle=vdisk_info) threads['evented']['io']['pairs'] = io_thread_pairs threads['evented']['io']['r_semaphore'] = io_r_semaphore screen_names, output_files = DataWriter.write_data_fio( client=compute_client, fio_configuration={ 'io_size': cls.AMOUNT_TO_WRITE, 'configuration': configuration }, edge_configuration=edge_configuration) logger.info( 'Doing IO for {0}s before bringing down the node.'.format( cls.IO_TIME)) ThreadingHandler.keep_threads_running( r_semaphore=io_r_semaphore, threads=io_thread_pairs, shared_resource=monitoring_data, duration=cls.IO_TIME) # Threads ready for monitoring at this point ######################### # Bringing original owner of the volume down ######################### try: logger.info('Stopping {0}.'.format(vm_to_stop)) VMHandler.stop_vm(hypervisor=parent_hypervisor, vmid=vm_to_stop) downed_time = time.time() vm_downed = True except Exception as ex: logger.error('Failed to stop. Got {0}'.format(str(ex))) raise time.sleep(cls.IO_REFRESH_RATE * 2) # Start IO polling to verify nothing went down ThreadingHandler.poll_io( r_semaphore=io_r_semaphore, required_thread_amount=len(io_thread_pairs), shared_resource=monitoring_data, downed_time=downed_time, timeout=timeout, output_files=output_files, client=compute_client, disk_amount=disk_amount) cls._validate(values_to_check, monitoring_data) except Exception as ex: failed_configurations.append({ 'configuration': configuration, 'reason': str(ex) }) finally: for thread_category, thread_collection in threads[ 'evented'].iteritems(): ThreadHelper.stop_evented_threads( thread_collection['pairs'], thread_collection['r_semaphore']) if vm_downed is True: VMHandler.start_vm(parent_hypervisor, vm_to_stop) SystemHelper.idle_till_ovs_is_up( source_storagedriver.storage_ip, **cls.get_shell_user()) # @TODO: Remove when https://github.com/openvstorage/integrationtests/issues/540 is fixed FwkHandler.restart_all() if screen_names: for screen_name in screen_names: compute_client.run( ['screen', '-S', screen_name, '-X', 'quit']) for vdisk in vdisk_info.values(): VDiskRemover.remove_vdisk(vdisk.guid) assert len(failed_configurations ) == 0, 'Certain configuration failed: {0}'.format( ' '.join(failed_configurations))
def run_test(cls, vm_info, cluster_info, logger=LOGGER): """ Tests the HA using a virtual machine which will write in his own filesystem :param cluster_info: information about the cluster, contains all dal objects :type cluster_info: dict :param vm_info: info about the vms :param logger: logging instance :return: None :rtype: NoneType """ compute_client = SSHClient(cluster_info['storagerouters']['compute'], username='******') failed_configurations = [] destination_storagedriver = cluster_info['storagedrivers'][ 'destination'] source_storagedriver = cluster_info['storagedrivers']['source'] # Cache to validate properties values_to_check = { 'source_std': source_storagedriver.serialize(), 'target_std': destination_storagedriver.serialize() } vm_to_stop = cls.HYPERVISOR_INFO['vms'][ source_storagedriver.storage_ip]['name'] parent_hypervisor = HypervisorFactory().get() # Extract vdisk info from vm_info vdisk_info = {} disk_amount = 0 for vm_name, vm_object in vm_info.iteritems(): for vdisk in vm_object['vdisks']: # Ignore the cd vdisk as no IO will come from it if vdisk.name == vm_object['cd_path'].replace( '.raw', '').split('/')[-1]: continue disk_amount += 1 vdisk_info.update({vdisk.name: vdisk}) with remote(compute_client.ip, [SSHClient]) as rem: configuration = random.choice(cls.DATA_TEST_CASES) threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None}}} output_files = [] vm_downed = False try: logger.info('Starting the following configuration: {0}'.format( configuration)) for vm_name, vm_data in vm_info.iteritems(): vm_client = rem.SSHClient(vm_data['ip'], cls.VM_USERNAME, cls.VM_PASSWORD) vm_client.file_create('/mnt/data/{0}.raw'.format( vm_data['create_msg'])) vm_data['client'] = vm_client io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads( volume_bundle=vdisk_info) threads['evented']['io']['pairs'] = io_thread_pairs threads['evented']['io']['r_semaphore'] = io_r_semaphore for vm_name, vm_data in vm_info.iteritems(): # Write data screen_names, output_files = DataWriter.write_data_fio( client=vm_data['client'], fio_configuration={ 'io_size': cls.AMOUNT_TO_WRITE, 'configuration': configuration }, file_locations=[ '/mnt/data/{0}.raw'.format(vm_data['create_msg']) ]) vm_data['screen_names'] = screen_names logger.info( 'Doing IO for {0}s before bringing down the node.'.format( cls.IO_TIME)) ThreadingHandler.keep_threads_running( r_semaphore=io_r_semaphore, threads=io_thread_pairs, shared_resource=monitoring_data, duration=cls.IO_TIME) # Threads ready for monitoring at this point ######################### # Bringing original owner of the volume down ######################### try: logger.info('Stopping {0}.'.format(vm_to_stop)) VMHandler.stop_vm(hypervisor=parent_hypervisor, vmid=vm_to_stop) vm_downed = True except Exception as ex: logger.error('Failed to stop. Got {0}'.format(str(ex))) raise downed_time = time.time() time.sleep(cls.IO_REFRESH_RATE * 2) # Start IO polling to verify nothing went down ThreadingHandler.poll_io( r_semaphore=io_r_semaphore, required_thread_amount=len(io_thread_pairs), shared_resource=monitoring_data, downed_time=downed_time, timeout=cls.HA_TIMEOUT, output_files=output_files, client=compute_client, disk_amount=disk_amount) cls._validate(values_to_check, monitoring_data) except Exception as ex: logger.error( 'Running the test for configuration {0} has failed because {1}' .format(configuration, str(ex))) failed_configurations.append({ 'configuration': configuration, 'reason': str(ex) }) finally: for thread_category, thread_collection in threads[ 'evented'].iteritems(): ThreadHelper.stop_evented_threads( thread_collection['pairs'], thread_collection['r_semaphore']) if vm_downed is True: VMHandler.start_vm(parent_hypervisor, vm_to_stop) logger.debug('Started {0}'.format(vm_to_stop)) SystemHelper.idle_till_ovs_is_up( source_storagedriver.storage_ip, **cls.get_shell_user()) # @TODO: Remove when https://github.com/openvstorage/integrationtests/issues/540 is fixed FwkHandler.restart_all() for vm_name, vm_data in vm_info.iteritems(): for screen_name in vm_data.get('screen_names', []): logger.debug('Stopping screen {0} on {1}.'.format( screen_name, vm_data['client'].ip)) vm_data['client'].run( ['screen', '-S', screen_name, '-X', 'quit']) vm_data['screen_names'] = [] assert len(failed_configurations ) == 0, 'Certain configuration failed: {0}'.format( ' '.join(failed_configurations))
def run_test(cls, vm_info, cluster_info, logger=LOGGER): """ Tests the DTL using a virtual machine which will write in his own filesystem Expects last data to be pulled from the DTL and not backend :param cluster_info: information about the cluster, contains all dal objects :type cluster_info: dict :param vm_info: info about the vms :param logger: logging instance :return: None :rtype: NoneType """ source_std = cluster_info['storagedrivers']['source'] source_client = SSHClient(source_std.storagerouter, username='******') compute_str = cluster_info['storagerouters']['compute'] compute_client = SSHClient(compute_str) # setup hypervisor details parent_hypervisor = HypervisorFactory().get() vm_to_stop = cls.HYPERVISOR_INFO['vms'][source_std.storage_ip]['name'] vdisk_info = {} disk_amount = 0 for vm_name, vm_object in vm_info.iteritems(): for vdisk in vm_object['vdisks']: # Ignore the cd vdisk as no IO will come from it if vdisk.name == vm_object['cd_path'].replace( '.raw', '').split('/')[-1]: continue disk_amount += 1 vdisk_info.update({vdisk.name: vdisk}) # Cache to validate properties values_to_check = { 'source_std': source_std.serialize(), 'vdisks': vdisk_info.values() } with remote(compute_str.ip, [SSHClient]) as rem: threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None}}} vm_downed = False output_files = [] try: for vm_name, vm_data in vm_info.iteritems(): vm_client = rem.SSHClient(vm_data['ip'], cls.VM_USERNAME, cls.VM_PASSWORD) vm_client.file_create('/mnt/data/{0}.raw'.format( vm_data['create_msg'])) vm_data['client'] = vm_client # Load dd, md5sum, screen & fio in memory vm_data['client'].run([ 'dd', 'if=/dev/urandom', 'of={0}'.format(cls.VM_RANDOM), 'bs=1M', 'count=2' ]) vm_data['client'].run(['md5sum', cls.VM_RANDOM]) logger.info("Stopping proxy services") service_manager = ServiceFactory.get_manager() for proxy in source_std.alba_proxies: service_manager.restart_service(proxy.service.name, client=source_client) logger.info( 'Starting to WRITE file while proxy is offline. All data should be stored in the DTL!' ) for vm_name, vm_data in vm_info.iteritems(): vm_data['client'].run( 'dd if=/dev/urandom of={0} bs=1M count=2'.format( cls.VM_FILENAME).split()) original_md5sum = ' '.join(vm_data['client'].run( ['md5sum', cls.VM_FILENAME]).split()) vm_data['original_md5sum'] = original_md5sum logger.info('Original MD5SUM for VM {0}: {1}.'.format( vm_name, original_md5sum)) logger.info('Finished to WRITE file while proxy is offline!') logger.info( "Starting fio to generate IO for failing over.".format( cls.IO_TIME)) io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads( volume_bundle=vdisk_info) threads['evented']['io']['pairs'] = io_thread_pairs threads['evented']['io']['r_semaphore'] = io_r_semaphore for vm_name, vm_data in vm_info.iteritems(): # Write data screen_names, output_files = DataWriter.write_data_fio( client=vm_data['client'], fio_configuration={ 'io_size': cls.AMOUNT_TO_WRITE, 'configuration': cls.IO_PATTERN }, file_locations=[ '/mnt/data/{0}.raw'.format(vm_data['create_msg']) ]) vm_data['screen_names'] = screen_names logger.info( 'Doing IO for {0}s before bringing down the node.'.format( cls.IO_TIME)) ThreadingHandler.keep_threads_running( r_semaphore=io_r_semaphore, threads=io_thread_pairs, shared_resource=monitoring_data, duration=cls.IO_TIME) ############################################## # Bringing original owner of the volume down # ############################################## VMHandler.stop_vm(hypervisor=parent_hypervisor, vmid=vm_to_stop) vm_downed = True downed_time = time.time() time.sleep(cls.IO_REFRESH_RATE * 2) # Start IO polling to verify nothing went down ThreadingHandler.poll_io( r_semaphore=io_r_semaphore, required_thread_amount=len(io_thread_pairs), shared_resource=monitoring_data, downed_time=downed_time, timeout=cls.HA_TIMEOUT, output_files=output_files, client=compute_client, disk_amount=disk_amount) logger.info('Starting to validate move...') cls._validate_move(values_to_check) logger.info('Finished validating move!') logger.info('Validate if DTL is working correctly!') unmatching_checksum_vms = [] for vm_name, vm_data in vm_info.iteritems(): current_md5sum = ' '.join(vm_data['client'].run( ['md5sum', cls.VM_FILENAME]).split()) if vm_data['original_md5sum'] != current_md5sum: unmatching_checksum_vms.append(vm_name) assert len( unmatching_checksum_vms ) == 0, 'Not all data was read from the DTL. Checksums do not line up for {}'.format( ', '.join(unmatching_checksum_vms)) logger.info('DTL is working correctly!') finally: for thread_category, thread_collection in threads[ 'evented'].iteritems(): ThreadHelper.stop_evented_threads( thread_collection['pairs'], thread_collection['r_semaphore']) if vm_downed is True: VMHandler.start_vm(parent_hypervisor, vm_to_stop) logger.debug('Started {0}'.format(vm_to_stop)) SystemHelper.idle_till_ovs_is_up(source_std.storage_ip, **cls.get_shell_user()) # @TODO: Remove when https://github.com/openvstorage/integrationtests/issues/540 is fixed FwkHandler.restart_all() for vm_name, vm_data in vm_info.iteritems(): for screen_name in vm_data.get('screen_names', []): logger.debug('Stopping screen {0} on {1}.'.format( screen_name, vm_data['client'].ip)) vm_data['client'].run( ['screen', '-S', screen_name, '-X', 'quit']) vm_data['screen_names'] = []
def run_test_edge_blktap(cls, storagedriver, image_path, disk_amount, write_amount, logger=LOGGER): """ Runs the fio deployment using edge and blocktap combination. Creates the disks using edge (via qemu convert) Writes data to the disks using blocktap :param storagedriver: chosen storagedriver :param image_path: Path to the image to convert :param disk_amount: Amount of disks to deploy :param write_amount: Amount of data to write :param logger: logging instance :return: None """ client = SSHClient(storagedriver.storagerouter, username='******') vpool = storagedriver.vpool edge_info = { 'port': storagedriver.ports['edge'], 'protocol': storagedriver.cluster_node_config['network_server_uri'].split(':') [0], 'ip': storagedriver.storage_ip } if SystemHelper.get_ovs_version(storagedriver.storagerouter) == 'ee': edge_info.update(cls.get_shell_user()) vdisk_info = {} try: for vdisk_number in xrange(disk_amount): # Create all images first vdisk_name = '{0}_{1}_-blktap'.format(cls.PREFIX, vdisk_number) logger.info("Converting image {0} to {1}:{2}".format( image_path, edge_info['ip'], vdisk_name)) VMHandler.convert_image(client, image_path, vdisk_name, edge_info) logger.info( "Creating a tap blk device for image.{0}:{1}".format( image_path, edge_info['ip'], vdisk_name)) tap_dir = VMHandler.create_blktap_device( client, vdisk_name, edge_info) vdisk_info[vdisk_name] = tap_dir fio_configuration = { 'io_size': write_amount, 'configuration': (0, 100) } DataWriter.write_data_fio(client, fio_configuration, file_locations=vdisk_info.values(), screen=False, loop_screen=False) fio_configuration = { 'io_size': write_amount, 'configuration': (100, 0) } DataWriter.write_data_fio(client, fio_configuration, file_locations=vdisk_info.values(), screen=False, loop_screen=False) except Exception as ex: logger.error( 'An exception occur while testing edge+blktap: {0}'.format( str(ex))) raise finally: for tap_conn in client.run(['tap-ctl', 'list']).splitlines(): if not tap_conn.endswith(tuple(vdisk_info.keys())): continue logger.info("Deleting tapctl connection {0}".format(tap_conn)) tap_conn_pid = None tap_conn_minor = None for tap_conn_section in tap_conn.split(): if tap_conn_section.startswith('pid='): tap_conn_pid = tap_conn_section.replace('pid=', '') elif tap_conn_section.startswith('minor='): tap_conn_minor = tap_conn_section.replace('minor=', '') if tap_conn_pid is None or tap_conn_minor is None: raise ValueError( 'Unable to destroy the blocktap connection because its output format has changed.' ) client.run([ "tap-ctl", "destroy", "-p", tap_conn_pid, "-m", tap_conn_minor ]) for vdisk_name in vdisk_info.keys(): VDiskRemover.remove_vdisk_by_name(vdisk_name, vpool.name)
def validate_add_extend_remove_vpool(cls, timeout=ADD_EXTEND_REMOVE_VPOOL_TIMEOUT): """ Validate if we can add, extend and/or remove a vPool, testing the following scenarios: * Normal with no accelerated backend * Accelerated vPool with hdd_backend & ssd_backend INFO: * at least 2 storagerouters should be available * at least 2 backends should be available with default preset :param timeout: specify a timeout :type timeout: int :return: """ cls.LOGGER.info("Starting to validate add-extend-remove vpool") storagerouter_ips = [] for storagerouter_ip in StoragerouterHelper.get_storagerouter_ips(): try: RoleValidation.check_required_roles(VPoolSetup.REQUIRED_VPOOL_ROLES, storagerouter_ip, "LOCAL") storagerouter_ips.append(storagerouter_ip) cls.LOGGER.info("Added `{0}` to list of eligible storagerouters".format(storagerouter_ip)) except RuntimeError as ex: cls.LOGGER.warning("Did not add `{0}` to list of eligible " "storagerouters because: {1}".format(storagerouter_ip, ex)) pass # Filter storagerouters without required roles assert len(storagerouter_ips) > 1, "We need at least 2 storagerouters with valid roles: {0}"\ .format(storagerouter_ips) alba_backends = BackendHelper.get_alba_backends() assert len(alba_backends) >= 2, "We need at least 2 or more backends!" # Global vdisk details vdisk_deployment_ip = storagerouter_ips[0] # Determine backends (2) hdd_backend = alba_backends[0] ssd_backend = alba_backends[1] # Add preset to all alba_backends (we only use the first two as seen above) for alba_backend in alba_backends[0:2]: cls.LOGGER.info("Adding custom preset to backend {0}".format(alba_backend.name)) preset_result = BackendSetup.add_preset(albabackend_name=alba_backend.name, preset_details=cls.PRESET, timeout=cls.PRESET_CREATE_TIMEOUT) assert preset_result is True, 'Failed to add preset to backend {0}'.format(alba_backend.name) cls.LOGGER.info("Finished adding custom preset to backend {0}".format(alba_backend.name)) # Vpool configs, regressing https://github.com/openvstorage/alba/issues/560 & more vpool_configs = { "no_fragment_cache_on_disk": { "strategy": {"cache_on_read": False, "cache_on_write": False}, "location": "disk" }, "no_fragment_cache_on_accel": { "strategy": {"cache_on_read": False, "cache_on_write": False}, "location": "backend", "backend": { "name": ssd_backend.name, "preset": cls.PRESET['name'] } } } for cfg_name, cfg in vpool_configs.iteritems(): # Create vpool block_cache_cfg = None if SystemHelper.get_ovs_version().lower() == 'ee': block_cache_cfg = cfg for storagerouter_ip in storagerouter_ips: cls.LOGGER.info("Add/extend vPool `{0}` on storagerouter `{1}`".format(cls.VPOOL_NAME, storagerouter_ip)) start = time.time() try: cls._add_vpool(vpool_name=cls.VPOOL_NAME, fragment_cache_cfg=cfg, block_cache_cfg=block_cache_cfg, albabackend_name=hdd_backend.name, timeout=timeout, preset_name=cls.PRESET['name'], storagerouter_ip=storagerouter_ip) except TimeOutError: cls.LOGGER.warning('Adding/extending the vpool has timed out after {0}s. Polling for another {1}s.' .format(timeout, cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING - timeout)) # Lets be a bit forgiving and give the fwk 5 mins to actually complete the task vpool = VPoolHelper.get_vpool_by_name(cls.VPOOL_NAME) while vpool.status != 'RUNNING': if time.time() - start > cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING: raise RuntimeError('The vpool was not added or extended after {0}s'.format(cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING)) cls.LOGGER.warning('Vpool status is still {0} after {1}s.'.format(vpool.status, time.time() - start)) time.sleep(1) vpool.discard() cls.LOGGER.warning('The vpool was added or extended after {0}s.'.format(time.time() - start)) except RuntimeError as ex: cls.LOGGER.error('Adding/extending the vpool has failed with {0}.'.format(str(ex))) raise # Check #proxies vpool = VPoolHelper.get_vpool_by_name(cls.VPOOL_NAME) for storagedriver in vpool.storagedrivers: assert len(storagedriver.alba_proxies) == 2, 'The vpool did not get setup with 2 proxies. Found {} instead.'.format(len(storagedriver.alba_proxies)) # Deploy a vdisk vdisk_name = cls.PREFIX + cfg_name cls.LOGGER.info("Starting to create vdisk `{0}` on vPool `{1}` with size `{2}` on node `{3}`" .format(vdisk_name, cls.VPOOL_NAME, cls.VDISK_SIZE, vdisk_deployment_ip)) VDiskSetup.create_vdisk(vdisk_name=vdisk_name + '.raw', vpool_name=cls.VPOOL_NAME, size=cls.VDISK_SIZE, storagerouter_ip=vdisk_deployment_ip, timeout=cls.VDISK_CREATE_TIMEOUT) cls.LOGGER.info("Finished creating vdisk `{0}`".format(vdisk_name)) cls.LOGGER.info("Starting to delete vdisk `{0}`".format(vdisk_name)) VDiskRemover.remove_vdisk_by_name(vdisk_name, cls.VPOOL_NAME) cls.LOGGER.info("Finished deleting vdisk `{0}`".format(vdisk_name)) # Delete vpool for storagerouter_ip in storagerouter_ips: storagedrivers_to_delete = len(vpool.storagedrivers) cls.LOGGER.info("Deleting vpool `{0}` on storagerouter `{1}`".format(cls.VPOOL_NAME, storagerouter_ip)) try: VPoolRemover.remove_vpool(vpool_name=cls.VPOOL_NAME, storagerouter_ip=storagerouter_ip, timeout=timeout) except TimeOutError: try: vpool.discard() # Discard is needed to update the vpool status as it was running before while vpool.status != 'RUNNING': cls.LOGGER.warning('Removal/shrinking the vpool has timed out after {0}s. Polling for another {1}s.' .format(timeout, cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING - timeout)) if time.time() - start > cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING: raise RuntimeError('The vpool was not removed or extended after {0}s'.format(cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING)) cls.LOGGER.warning('Vpool status is still {0} after {1}s.'.format(vpool.status, time.time() - start)) time.sleep(1) vpool.discard() except ObjectNotFoundException: if storagedrivers_to_delete != 1: # Should be last one raise except RuntimeError as ex: cls.LOGGER.error('Shrinking/removing the vpool has failed with {0}.'.format(str(ex))) raise cls.LOGGER.info('Vpool has been fully removed.') # Delete presets for alba_backend in alba_backends[0:2]: cls.LOGGER.info("Removing custom preset from backend {0}".format(alba_backend.name)) remove_preset_result = BackendRemover.remove_preset(albabackend_name=alba_backend.name, preset_name=cls.PRESET['name'], timeout=cls.PRESET_REMOVE_TIMEOUT) assert remove_preset_result is True, 'Failed to remove preset from backend {0}'.format(alba_backend.name) cls.LOGGER.info("Finshed removing custom preset from backend {0}".format(alba_backend.name)) cls.LOGGER.info("Finished to validate add-extend-remove vpool")
def validate_post_reboot(tries=POST_REBOOT_TRIES, timeout=POST_REBOOT_TIMEOUT): """ Validate if all services come up after rebooting a node :param tries: amount of tries to check if ovs services are running :type tries: int :param timeout: timeout between tries :type timeout: int :return: """ storagerouter_ips = list(StoragerouterHelper.get_storagerouter_ips()) assert len( storagerouter_ips) >= 2, "We need at least 2 storagerouters!" PostRebootChecks.LOGGER.info('Starting election of node to reboot') local_host = SystemHelper.get_local_storagerouter( ).ip # ip address of node where tests are being executed storagerouter_ips.remove( local_host ) # remove local ip address so we don't reboot where the tests are running host_to_reboot = storagerouter_ips[ 0] # pick first node that we can find PostRebootChecks.LOGGER.info( 'Finished election of node to reboot: {0}'.format(host_to_reboot)) # setup beginning ssh connection client = PostRebootChecks.create_client(host_to_reboot) # reboot server and wait for it to come up PostRebootChecks.LOGGER.info( 'Starting reboot of host `{0}`!'.format(host_to_reboot)) client.run(" ( sleep {0} ; reboot ) &".format( PostRebootChecks.SSH_REBOOT_DELAY)) time.sleep(10) ssh_tries = 0 while ssh_tries < PostRebootChecks.SSH_WAIT_TRIES: try: PostRebootChecks.create_client(host_to_reboot) PostRebootChecks.LOGGER.info( 'host `{0}` is up again!'.format(host_to_reboot)) break except Exception: ssh_tries += 1 PostRebootChecks.LOGGER.warning( 'Host `{0}` still not up at try {1}/{2} ...'.format( host_to_reboot, ssh_tries, PostRebootChecks.SSH_WAIT_TRIES)) time.sleep(10) # timeout or else its going too fast if ssh_tries == PostRebootChecks.SSH_WAIT_TRIES: # if we reach max tries, throw exception raise RuntimeError( "Max amounts of attempts reached ({0}) for host `{1}`, host still not up ..." .format(ssh_tries, host_to_reboot)) # commence test PostRebootChecks.LOGGER.info( 'Starting post-reboot service check on node `{0}`'.format( host_to_reboot)) amount_tries = 0 non_running_services = None client = SSHClient(host_to_reboot, username='******') while tries >= amount_tries: non_running_services = SystemHelper.get_non_running_ovs_services( client) if len(non_running_services) == 0: break else: amount_tries += 1 time.sleep(timeout) assert len( non_running_services ) == 0, "Found non running services `{0}` after reboot on node `{1}`".format( non_running_services, host_to_reboot) PostRebootChecks.LOGGER.info( 'Starting post-reboot vPool check on node `{0}`'.format( host_to_reboot)) PostRebootChecks.LOGGER.info( 'Finished post-reboot check on node `{0}`'.format(host_to_reboot))
def _execute_test(cls): """ Mimics the healthcheck creating and deleting disks with the same name/devicename back to back :return: None """ local_sr = SystemHelper.get_local_storagerouter() cls.LOGGER.info("Starting creation/deletion test.") # Elect vpool assert len( local_sr.storagedrivers ) > 0, 'Node {0} has no storagedriver. Cannot test {1}'.format( local_sr.ip, VDiskControllerTester.TEST_NAME) random_storagedriver = local_sr.storagedrivers[random.randint( 0, len(local_sr.storagedrivers) - 1)] vpool = random_storagedriver.vpool disk_size = 1024**3 disk_name = 'ci_scenario_rapid_create_delete_same_device' exceptions = [] for loop in xrange(0, 100): test_passed = False try: cls.LOGGER.info("Creating new disk.") try: VDiskController.create_new(disk_name, disk_size, random_storagedriver.guid) except Exception as ex: cls.LOGGER.error( 'Creation failed. Got {0} in iteration {1}'.format( str(ex), loop)) exceptions.append( 'Creation failed. Got {0} in iteration {1}'.format( str(ex), loop)) continue cls.LOGGER.info("Fetching new disk.") try: vdisk = VDiskHelper.get_vdisk_by_name( '{0}.raw'.format(disk_name), vpool.name) except Exception as ex: cls.LOGGER.error( 'Fetch failed. Got {0} in iteration {1}'.format( str(ex), loop)) exceptions.append( 'Fetch failed. Got {0} in iteration {1}'.format( str(ex), loop)) continue cls.LOGGER.info("Deleting new disk.") try: VDiskController.delete(vdisk_guid=vdisk.guid) except Exception as ex: cls.LOGGER.error( 'Delete failed. Got {0} in iteration {1}'.format( str(ex), loop)) exceptions.append( 'Delete failed. Got {0} in iteration {1}'.format( str(ex), loop)) test_passed = True except Exception as ex: cls.LOGGER.error( 'Unexpected exception occurred during loop {0}. Got {1}.'. format(loop, str(ex))) finally: try: cls._cleanup_vdisk(disk_name, vpool.name, not test_passed) except Exception as ex: cls.LOGGER.error( "Auto cleanup failed with {0} in iteration {1}.". format(str(ex), loop)) exceptions.append( 'Auto cleanup failed, got {0} in iteration {1}'.format( str(ex), loop)) assert len( exceptions ) == 0, 'Exception occurred during the creation of vdisks with the same devicename. Got {0}'.format( ', '.join(exceptions)) cls.LOGGER.info("Finished create/delete test.")