예제 #1
0
 def setup_cloud_info(cls, client, src_std):
     """
     Retrieve the cloud init file
     :param client: SSHclient to use for cloud initialization
     :type client: ovs.extensions.generic.SSHClient
     :param src_std: storagedriver to check which edition is running
     :type src_std: ovs.dal.hybrids.StorageDriver
     :return:
     """
     cloud_init_loc = cls.CLOUD_INIT_DATA.get('script_dest')
     client.run(['wget', cls.CLOUD_INIT_DATA.get('script_loc'), '-O', cloud_init_loc])
     client.file_chmod(cloud_init_loc, 755)
     assert client.file_exists(cloud_init_loc), 'Could not fetch the cloud init script'
     is_ee = SystemHelper.get_ovs_version(src_std.storagerouter) == 'ee'
     return cloud_init_loc, is_ee
예제 #2
0
 def setup(cls):
     vpools = VPoolHelper.get_vpools()
     assert len(vpools) >= 1, "Not enough vPools to test"
     vpool = vpools[0]  # Just pick the first vpool you find
     assert len(
         vpool.storagedrivers) >= 1, "Not enough Storagedrivers to test"
     storagedriver = vpool.storagedrivers[
         0]  # just pick the first storagedriver you find
     source_str = storagedriver.storagerouter
     client = SSHClient(source_str, username='******')
     is_ee = SystemHelper.get_ovs_version(source_str) == 'ee'
     if is_ee is True:
         fio_bin_loc = cls.FIO_BIN_EE['location']
         fio_bin_url = cls.FIO_BIN_EE['url']
     else:
         fio_bin_loc = cls.FIO_BIN['location']
         fio_bin_url = cls.FIO_BIN['url']
     client.run(['wget', fio_bin_url, '-O', fio_bin_loc])
     client.file_chmod(fio_bin_loc, 755)
     return storagedriver, fio_bin_loc, is_ee
예제 #3
0
    def validate_services(tries=SERVICE_TRIES, timeout=SERVICE_TIMEOUT):
        """
        Validate if all services come up after installation of the setup

        :param tries: amount of tries to check if ovs services are running
        :type tries: int
        :param timeout: timeout between tries
        :type timeout: int
        :return:
        """

        ServiceChecks.LOGGER.info('Starting validating services')
        storagerouter_ips = StoragerouterHelper.get_storagerouter_ips()
        assert len(
            storagerouter_ips) >= 1, "We need at least 1 storagerouters!"
        # commence test
        for storagerouter_ip in storagerouter_ips:
            ServiceChecks.LOGGER.info(
                'Starting service check on node `{0}`'.format(
                    storagerouter_ip))
            amount_tries = 0
            non_running_services = None
            client = SSHClient(storagerouter_ip, username='******')
            while tries >= amount_tries:
                non_running_services = SystemHelper.get_non_running_ovs_services(
                    client)
                if len(non_running_services) == 0:
                    break
                else:
                    amount_tries += 1
                    time.sleep(timeout)
            assert len(
                non_running_services
            ) == 0, "Found non running services `{0}` after reboot on node `{1}`".format(
                non_running_services, storagerouter_ip)
            ServiceChecks.LOGGER.info(
                'Finished validating services on node `{0}`'.format(
                    storagerouter_ip))
        ServiceChecks.LOGGER.info('Finished validating services')
예제 #4
0
    def setup(cls, logger=LOGGER):
        destination_str, source_str, compute_str = StoragerouterHelper().get_storagerouters_by_role()
        destination_storagedriver = None
        source_storagedriver = None
        if len(source_str.regular_domains) == 0:
            storagedrivers = StoragedriverHelper.get_storagedrivers()
        else:
            storagedrivers = DomainHelper.get_storagedrivers_in_same_domain(domain_guid=source_str.regular_domains[0])
        for storagedriver in storagedrivers:
            if len(storagedriver.vpool.storagedrivers) < 2:
                continue
            if storagedriver.guid in destination_str.storagedrivers_guids:
                if destination_storagedriver is None and (source_storagedriver is None or source_storagedriver.vpool_guid == storagedriver.vpool_guid):
                    destination_storagedriver = storagedriver
                    logger.info('Chosen destination storagedriver is: {0}'.format(destination_storagedriver.storage_ip))
            elif storagedriver.guid in source_str.storagedrivers_guids:
                # Select if the source driver isn't select and destination is also unknown or the storagedriver has matches with the same vpool
                if source_storagedriver is None and (destination_storagedriver is None or destination_storagedriver.vpool_guid == storagedriver.vpool_guid):
                    source_storagedriver = storagedriver
                    logger.info('Chosen source storagedriver is: {0}'.format(source_storagedriver.storage_ip))
        assert source_storagedriver is not None and destination_storagedriver is not None, 'We require at least two storagedrivers within the same domain.'

        cluster_info = {'storagerouters': {'destination': destination_str, 'source': source_str, 'compute': compute_str},
                        'storagedrivers': {'destination': destination_storagedriver, 'source': source_storagedriver}}
        compute_client = SSHClient(compute_str, username='******')

        is_ee = SystemHelper.get_ovs_version(source_str) == 'ee'
        if is_ee is True:
            fio_bin_loc = cls.FIO_BIN_EE['location']
            fio_bin_url = cls.FIO_BIN_EE['url']
        else:
            fio_bin_loc = cls.FIO_BIN['location']
            fio_bin_url = cls.FIO_BIN['url']

        compute_client.run(['wget', fio_bin_url, '-O', fio_bin_loc])
        compute_client.file_chmod(fio_bin_loc, 755)
        return cluster_info, is_ee, fio_bin_loc
예제 #5
0
    def validate_vdisk_deployment(cls):
        """
        Validate if vdisk deployment works via various ways
        INFO: 1 vPool should be available on 1 storagerouter
        :return:
        """
        cls.LOGGER.info("Starting to validate the vdisk deployment")

        vpools = VPoolHelper.get_vpools()
        assert len(vpools) >= 1, "Not enough vPools to test"

        vpool = vpools[0]  # just pick the first vpool you find
        assert len(
            vpool.storagedrivers) >= 1, "Not enough Storagedrivers to test"

        # setup base information
        storagedriver = vpool.storagedrivers[0]
        protocol = storagedriver.cluster_node_config[
            'network_server_uri'].split(':')[0]
        storage_ip = storagedriver.storage_ip
        edge_port = storagedriver.ports['edge']
        client = SSHClient(storagedriver.storage_ip, username='******')
        # =======
        # VIA API
        # =======
        for size in cls.VDISK_SIZES:
            api_disk_name = cls.PREFIX + str(size) + '-api'
            cls.LOGGER.info(
                "Starting to create vdisk `{0}` on vPool `{1}` with size `{2}` "
                "on node `{3}`".format(api_disk_name, vpool.name, size,
                                       storagedriver.storagerouter.ip))
            VDiskSetup.create_vdisk(
                vdisk_name=api_disk_name + '.raw',
                vpool_name=vpool.name,
                size=size,
                storagerouter_ip=storagedriver.storagerouter.ip,
                timeout=cls.VDISK_CREATE_TIMEOUT)
            cls.LOGGER.info(
                "Finished creating vdisk `{0}`".format(api_disk_name))
            cls._check_vdisk(vdisk_name=api_disk_name, vpool_name=vpool.name)
            cls.LOGGER.info(
                "Starting to delete vdisk `{0}`".format(api_disk_name))
            VDiskRemover.remove_vdisk_by_name(api_disk_name, vpool.name)
            cls.LOGGER.info(
                "Finished deleting vdisk `{0}`".format(api_disk_name))

        # ========
        # VIA QEMU
        # ========
        for size in cls.VDISK_SIZES:
            qemu_disk_name = cls.PREFIX + str(size) + '-qemu'
            edge_info = {
                'port': edge_port,
                'protocol': protocol,
                'ip': storage_ip,
            }
            if SystemHelper.get_ovs_version(
                    storagedriver.storagerouter) == 'ee':
                edge_info.update(cls.get_shell_user())
            VMHandler.create_image(client, qemu_disk_name, size, edge_info)
            cls.LOGGER.info(
                "Finished creating vdisk `{0}`".format(qemu_disk_name))
            cls._check_vdisk(vdisk_name=qemu_disk_name, vpool_name=vpool.name)
            cls.LOGGER.info(
                "Starting to delete vdisk `{0}`".format(qemu_disk_name))
            VDiskRemover.remove_vdisk_by_name(qemu_disk_name, vpool.name)
            cls.LOGGER.info(
                "Finished deleting vdisk `{0}`".format(qemu_disk_name))

        # ============
        # VIA TRUNCATE
        # ============
        for size in cls.VDISK_SIZES:
            truncate_disk_name = cls.PREFIX + str(size) + '-trunc'
            cls.LOGGER.info(
                "Starting to create vdisk `{0}` on vPool `{1}` on node `{2}` "
                "with size `{3}`".format(truncate_disk_name, vpool.name,
                                         storagedriver.storage_ip, size))
            client.run([
                "truncate", "-s",
                str(size), "/mnt/{0}/{1}.raw".format(vpool.name,
                                                     truncate_disk_name)
            ])
            cls.LOGGER.info(
                "Finished creating vdisk `{0}`".format(truncate_disk_name))
            cls._check_vdisk(vdisk_name=truncate_disk_name,
                             vpool_name=vpool.name)
            cls.LOGGER.info(
                "Starting to delete vdisk `{0}`".format(truncate_disk_name))
            VDiskRemover.remove_vdisk_by_name(truncate_disk_name, vpool.name)
            cls.LOGGER.info(
                "Finished deleting vdisk `{0}`".format(truncate_disk_name))
        cls.LOGGER.info("Finished to validate the vdisk deployment")
예제 #6
0
    def test_ha_fio(cls,
                    fio_bin_path,
                    cluster_info,
                    is_ee,
                    disk_amount=1,
                    timeout=CIConstants.HA_TIMEOUT,
                    logger=LOGGER):
        """
        Uses a modified fio to work with the openvstorage protocol
        :param fio_bin_path: path of the fio binary
        :type fio_bin_path: str
        :param cluster_info: information about the cluster, contains all dal objects
        :type cluster_info: dict
        :param is_ee: is it an ee version or not
        :type is_ee: bool
        :param disk_amount: amount of disks to test fail over with
        :type disk_amount: int
        :param timeout: timeout in seconds
        :type timeout: int
        :param logger: logging instance
        :return: None
        :rtype: NoneType
        """
        destination_storagedriver = cluster_info['storagedrivers'][
            'destination']
        source_storagedriver = cluster_info['storagedrivers']['source']
        vpool = destination_storagedriver.vpool

        compute_client = SSHClient(cluster_info['storagerouters']['compute'],
                                   username='******')

        vm_to_stop = cls.HYPERVISOR_INFO['vms'][
            source_storagedriver.storage_ip]['name']
        parent_hypervisor = HypervisorFactory().get()
        values_to_check = {
            'source_std': source_storagedriver.serialize(),
            'target_std': destination_storagedriver.serialize(),
            'vdisks': []
        }
        # Create vdisks
        protocol = source_storagedriver.cluster_node_config[
            'network_server_uri'].split(':')[0]
        edge_configuration = {
            'fio_bin_location': fio_bin_path,
            'hostname': source_storagedriver.storage_ip,
            'port': source_storagedriver.ports['edge'],
            'protocol': protocol,
            'volumenames': []
        }
        if is_ee is True:
            edge_configuration.update(cls.get_shell_user())

        vdisk_info = {}
        failed_configurations = []

        for index in xrange(0, disk_amount):
            try:
                vdisk_name = '{0}_vdisk{1}'.format(cls.TEST_NAME,
                                                   str(index).zfill(3))
                data_vdisk = VDiskHelper.get_vdisk_by_guid(
                    VDiskSetup.create_vdisk(vdisk_name, vpool.name,
                                            cls.AMOUNT_TO_WRITE,
                                            source_storagedriver.storage_ip))
                vdisk_info[vdisk_name] = data_vdisk
                edge_configuration['volumenames'].append(
                    data_vdisk.devicename.rsplit('.', 1)[0].split('/', 1)[1])
                values_to_check['vdisks'].append(data_vdisk.serialize())
            except TimeOutError:
                logger.error('Creating the vdisk has timed out.')
                raise
            except RuntimeError as ex:
                logger.error('Could not create the vdisk. Got {0}'.format(
                    str(ex)))
                raise
        configuration = random.choice(cls.DATA_TEST_CASES)
        threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None}}}
        vm_downed = False
        screen_names = []
        try:
            logger.info(
                'Starting threads.'
            )  # Separate because creating vdisks takes a while, while creating the threads does not

            io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads(
                volume_bundle=vdisk_info)
            threads['evented']['io']['pairs'] = io_thread_pairs
            threads['evented']['io']['r_semaphore'] = io_r_semaphore
            screen_names, output_files = DataWriter.write_data_fio(
                client=compute_client,
                fio_configuration={
                    'io_size': cls.AMOUNT_TO_WRITE,
                    'configuration': configuration
                },
                edge_configuration=edge_configuration)
            logger.info(
                'Doing IO for {0}s before bringing down the node.'.format(
                    cls.IO_TIME))
            ThreadingHandler.keep_threads_running(
                r_semaphore=io_r_semaphore,
                threads=io_thread_pairs,
                shared_resource=monitoring_data,
                duration=cls.IO_TIME)
            # Threads ready for monitoring at this point
            #########################
            # Bringing original owner of the volume down
            #########################
            try:
                logger.info('Stopping {0}.'.format(vm_to_stop))
                VMHandler.stop_vm(hypervisor=parent_hypervisor,
                                  vmid=vm_to_stop)
                downed_time = time.time()
                vm_downed = True
            except Exception as ex:
                logger.error('Failed to stop. Got {0}'.format(str(ex)))
                raise
            time.sleep(cls.IO_REFRESH_RATE * 2)
            # Start IO polling to verify nothing went down
            ThreadingHandler.poll_io(
                r_semaphore=io_r_semaphore,
                required_thread_amount=len(io_thread_pairs),
                shared_resource=monitoring_data,
                downed_time=downed_time,
                timeout=timeout,
                output_files=output_files,
                client=compute_client,
                disk_amount=disk_amount)
            cls._validate(values_to_check, monitoring_data)
        except Exception as ex:
            failed_configurations.append({
                'configuration': configuration,
                'reason': str(ex)
            })
        finally:
            for thread_category, thread_collection in threads[
                    'evented'].iteritems():
                ThreadHelper.stop_evented_threads(
                    thread_collection['pairs'],
                    thread_collection['r_semaphore'])
            if vm_downed is True:
                VMHandler.start_vm(parent_hypervisor, vm_to_stop)
                SystemHelper.idle_till_ovs_is_up(
                    source_storagedriver.storage_ip, **cls.get_shell_user())
                # @TODO: Remove when https://github.com/openvstorage/integrationtests/issues/540 is fixed
                FwkHandler.restart_all()
            if screen_names:
                for screen_name in screen_names:
                    compute_client.run(
                        ['screen', '-S', screen_name, '-X', 'quit'])
            for vdisk in vdisk_info.values():
                VDiskRemover.remove_vdisk(vdisk.guid)
        assert len(failed_configurations
                   ) == 0, 'Certain configuration failed: {0}'.format(
                       ' '.join(failed_configurations))
예제 #7
0
    def run_test(cls, vm_info, cluster_info, logger=LOGGER):
        """
        Tests the HA using a virtual machine which will write in his own filesystem
        :param cluster_info: information about the cluster, contains all dal objects
        :type cluster_info: dict
        :param vm_info: info about the vms
        :param logger: logging instance
        :return: None
        :rtype: NoneType
        """
        compute_client = SSHClient(cluster_info['storagerouters']['compute'],
                                   username='******')
        failed_configurations = []

        destination_storagedriver = cluster_info['storagedrivers'][
            'destination']
        source_storagedriver = cluster_info['storagedrivers']['source']

        # Cache to validate properties
        values_to_check = {
            'source_std': source_storagedriver.serialize(),
            'target_std': destination_storagedriver.serialize()
        }

        vm_to_stop = cls.HYPERVISOR_INFO['vms'][
            source_storagedriver.storage_ip]['name']
        parent_hypervisor = HypervisorFactory().get()
        # Extract vdisk info from vm_info
        vdisk_info = {}
        disk_amount = 0
        for vm_name, vm_object in vm_info.iteritems():
            for vdisk in vm_object['vdisks']:
                # Ignore the cd vdisk as no IO will come from it
                if vdisk.name == vm_object['cd_path'].replace(
                        '.raw', '').split('/')[-1]:
                    continue
                disk_amount += 1
                vdisk_info.update({vdisk.name: vdisk})

        with remote(compute_client.ip, [SSHClient]) as rem:
            configuration = random.choice(cls.DATA_TEST_CASES)
            threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None}}}
            output_files = []
            vm_downed = False
            try:
                logger.info('Starting the following configuration: {0}'.format(
                    configuration))
                for vm_name, vm_data in vm_info.iteritems():
                    vm_client = rem.SSHClient(vm_data['ip'], cls.VM_USERNAME,
                                              cls.VM_PASSWORD)
                    vm_client.file_create('/mnt/data/{0}.raw'.format(
                        vm_data['create_msg']))
                    vm_data['client'] = vm_client
                io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads(
                    volume_bundle=vdisk_info)
                threads['evented']['io']['pairs'] = io_thread_pairs
                threads['evented']['io']['r_semaphore'] = io_r_semaphore
                for vm_name, vm_data in vm_info.iteritems():  # Write data
                    screen_names, output_files = DataWriter.write_data_fio(
                        client=vm_data['client'],
                        fio_configuration={
                            'io_size': cls.AMOUNT_TO_WRITE,
                            'configuration': configuration
                        },
                        file_locations=[
                            '/mnt/data/{0}.raw'.format(vm_data['create_msg'])
                        ])
                    vm_data['screen_names'] = screen_names
                logger.info(
                    'Doing IO for {0}s before bringing down the node.'.format(
                        cls.IO_TIME))
                ThreadingHandler.keep_threads_running(
                    r_semaphore=io_r_semaphore,
                    threads=io_thread_pairs,
                    shared_resource=monitoring_data,
                    duration=cls.IO_TIME)
                # Threads ready for monitoring at this point
                #########################
                # Bringing original owner of the volume down
                #########################
                try:
                    logger.info('Stopping {0}.'.format(vm_to_stop))
                    VMHandler.stop_vm(hypervisor=parent_hypervisor,
                                      vmid=vm_to_stop)
                    vm_downed = True
                except Exception as ex:
                    logger.error('Failed to stop. Got {0}'.format(str(ex)))
                    raise
                downed_time = time.time()
                time.sleep(cls.IO_REFRESH_RATE * 2)
                # Start IO polling to verify nothing went down
                ThreadingHandler.poll_io(
                    r_semaphore=io_r_semaphore,
                    required_thread_amount=len(io_thread_pairs),
                    shared_resource=monitoring_data,
                    downed_time=downed_time,
                    timeout=cls.HA_TIMEOUT,
                    output_files=output_files,
                    client=compute_client,
                    disk_amount=disk_amount)
                cls._validate(values_to_check, monitoring_data)
            except Exception as ex:
                logger.error(
                    'Running the test for configuration {0} has failed because {1}'
                    .format(configuration, str(ex)))
                failed_configurations.append({
                    'configuration': configuration,
                    'reason': str(ex)
                })
            finally:
                for thread_category, thread_collection in threads[
                        'evented'].iteritems():
                    ThreadHelper.stop_evented_threads(
                        thread_collection['pairs'],
                        thread_collection['r_semaphore'])
                if vm_downed is True:
                    VMHandler.start_vm(parent_hypervisor, vm_to_stop)
                    logger.debug('Started {0}'.format(vm_to_stop))
                    SystemHelper.idle_till_ovs_is_up(
                        source_storagedriver.storage_ip,
                        **cls.get_shell_user())
                    # @TODO: Remove when https://github.com/openvstorage/integrationtests/issues/540 is fixed
                    FwkHandler.restart_all()
                for vm_name, vm_data in vm_info.iteritems():
                    for screen_name in vm_data.get('screen_names', []):
                        logger.debug('Stopping screen {0} on {1}.'.format(
                            screen_name, vm_data['client'].ip))
                        vm_data['client'].run(
                            ['screen', '-S', screen_name, '-X', 'quit'])
                    vm_data['screen_names'] = []
        assert len(failed_configurations
                   ) == 0, 'Certain configuration failed: {0}'.format(
                       ' '.join(failed_configurations))
예제 #8
0
    def run_test(cls, vm_info, cluster_info, logger=LOGGER):
        """
        Tests the DTL using a virtual machine which will write in his own filesystem
        Expects last data to be pulled from the DTL and not backend
        :param cluster_info: information about the cluster, contains all dal objects
        :type cluster_info: dict
        :param vm_info: info about the vms
        :param logger: logging instance
        :return: None
        :rtype: NoneType
        """
        source_std = cluster_info['storagedrivers']['source']
        source_client = SSHClient(source_std.storagerouter, username='******')

        compute_str = cluster_info['storagerouters']['compute']
        compute_client = SSHClient(compute_str)

        # setup hypervisor details
        parent_hypervisor = HypervisorFactory().get()
        vm_to_stop = cls.HYPERVISOR_INFO['vms'][source_std.storage_ip]['name']

        vdisk_info = {}
        disk_amount = 0
        for vm_name, vm_object in vm_info.iteritems():
            for vdisk in vm_object['vdisks']:
                # Ignore the cd vdisk as no IO will come from it
                if vdisk.name == vm_object['cd_path'].replace(
                        '.raw', '').split('/')[-1]:
                    continue
                disk_amount += 1
                vdisk_info.update({vdisk.name: vdisk})

        # Cache to validate properties
        values_to_check = {
            'source_std': source_std.serialize(),
            'vdisks': vdisk_info.values()
        }

        with remote(compute_str.ip, [SSHClient]) as rem:
            threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None}}}
            vm_downed = False
            output_files = []
            try:
                for vm_name, vm_data in vm_info.iteritems():
                    vm_client = rem.SSHClient(vm_data['ip'], cls.VM_USERNAME,
                                              cls.VM_PASSWORD)
                    vm_client.file_create('/mnt/data/{0}.raw'.format(
                        vm_data['create_msg']))
                    vm_data['client'] = vm_client
                    # Load dd, md5sum, screen & fio in memory
                    vm_data['client'].run([
                        'dd', 'if=/dev/urandom',
                        'of={0}'.format(cls.VM_RANDOM), 'bs=1M', 'count=2'
                    ])
                    vm_data['client'].run(['md5sum', cls.VM_RANDOM])

                logger.info("Stopping proxy services")
                service_manager = ServiceFactory.get_manager()

                for proxy in source_std.alba_proxies:
                    service_manager.restart_service(proxy.service.name,
                                                    client=source_client)

                logger.info(
                    'Starting to WRITE file while proxy is offline. All data should be stored in the DTL!'
                )
                for vm_name, vm_data in vm_info.iteritems():
                    vm_data['client'].run(
                        'dd if=/dev/urandom of={0} bs=1M count=2'.format(
                            cls.VM_FILENAME).split())
                    original_md5sum = ' '.join(vm_data['client'].run(
                        ['md5sum', cls.VM_FILENAME]).split())
                    vm_data['original_md5sum'] = original_md5sum
                    logger.info('Original MD5SUM for VM {0}: {1}.'.format(
                        vm_name, original_md5sum))
                logger.info('Finished to WRITE file while proxy is offline!')
                logger.info(
                    "Starting fio to generate IO for failing over.".format(
                        cls.IO_TIME))
                io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads(
                    volume_bundle=vdisk_info)
                threads['evented']['io']['pairs'] = io_thread_pairs
                threads['evented']['io']['r_semaphore'] = io_r_semaphore
                for vm_name, vm_data in vm_info.iteritems():  # Write data
                    screen_names, output_files = DataWriter.write_data_fio(
                        client=vm_data['client'],
                        fio_configuration={
                            'io_size': cls.AMOUNT_TO_WRITE,
                            'configuration': cls.IO_PATTERN
                        },
                        file_locations=[
                            '/mnt/data/{0}.raw'.format(vm_data['create_msg'])
                        ])
                    vm_data['screen_names'] = screen_names
                logger.info(
                    'Doing IO for {0}s before bringing down the node.'.format(
                        cls.IO_TIME))
                ThreadingHandler.keep_threads_running(
                    r_semaphore=io_r_semaphore,
                    threads=io_thread_pairs,
                    shared_resource=monitoring_data,
                    duration=cls.IO_TIME)
                ##############################################
                # Bringing original owner of the volume down #
                ##############################################
                VMHandler.stop_vm(hypervisor=parent_hypervisor,
                                  vmid=vm_to_stop)
                vm_downed = True
                downed_time = time.time()
                time.sleep(cls.IO_REFRESH_RATE * 2)
                # Start IO polling to verify nothing went down
                ThreadingHandler.poll_io(
                    r_semaphore=io_r_semaphore,
                    required_thread_amount=len(io_thread_pairs),
                    shared_resource=monitoring_data,
                    downed_time=downed_time,
                    timeout=cls.HA_TIMEOUT,
                    output_files=output_files,
                    client=compute_client,
                    disk_amount=disk_amount)
                logger.info('Starting to validate move...')
                cls._validate_move(values_to_check)
                logger.info('Finished validating move!')
                logger.info('Validate if DTL is working correctly!')
                unmatching_checksum_vms = []
                for vm_name, vm_data in vm_info.iteritems():
                    current_md5sum = ' '.join(vm_data['client'].run(
                        ['md5sum', cls.VM_FILENAME]).split())
                    if vm_data['original_md5sum'] != current_md5sum:
                        unmatching_checksum_vms.append(vm_name)
                assert len(
                    unmatching_checksum_vms
                ) == 0, 'Not all data was read from the DTL. Checksums do not line up for {}'.format(
                    ', '.join(unmatching_checksum_vms))
                logger.info('DTL is working correctly!')
            finally:
                for thread_category, thread_collection in threads[
                        'evented'].iteritems():
                    ThreadHelper.stop_evented_threads(
                        thread_collection['pairs'],
                        thread_collection['r_semaphore'])
                if vm_downed is True:
                    VMHandler.start_vm(parent_hypervisor, vm_to_stop)
                    logger.debug('Started {0}'.format(vm_to_stop))
                    SystemHelper.idle_till_ovs_is_up(source_std.storage_ip,
                                                     **cls.get_shell_user())
                    # @TODO: Remove when https://github.com/openvstorage/integrationtests/issues/540 is fixed
                    FwkHandler.restart_all()
                for vm_name, vm_data in vm_info.iteritems():
                    for screen_name in vm_data.get('screen_names', []):
                        logger.debug('Stopping screen {0} on {1}.'.format(
                            screen_name, vm_data['client'].ip))
                        vm_data['client'].run(
                            ['screen', '-S', screen_name, '-X', 'quit'])
                    vm_data['screen_names'] = []
예제 #9
0
 def run_test_edge_blktap(cls,
                          storagedriver,
                          image_path,
                          disk_amount,
                          write_amount,
                          logger=LOGGER):
     """
     Runs the fio deployment using edge and blocktap combination.
     Creates the disks using edge (via qemu convert)
     Writes data to the disks using blocktap
     :param storagedriver: chosen storagedriver
     :param image_path: Path to the image to convert
     :param disk_amount: Amount of disks to deploy
     :param write_amount: Amount of data to write
     :param logger: logging instance
     :return: None
     """
     client = SSHClient(storagedriver.storagerouter, username='******')
     vpool = storagedriver.vpool
     edge_info = {
         'port':
         storagedriver.ports['edge'],
         'protocol':
         storagedriver.cluster_node_config['network_server_uri'].split(':')
         [0],
         'ip':
         storagedriver.storage_ip
     }
     if SystemHelper.get_ovs_version(storagedriver.storagerouter) == 'ee':
         edge_info.update(cls.get_shell_user())
     vdisk_info = {}
     try:
         for vdisk_number in xrange(disk_amount):  # Create all images first
             vdisk_name = '{0}_{1}_-blktap'.format(cls.PREFIX, vdisk_number)
             logger.info("Converting image {0} to {1}:{2}".format(
                 image_path, edge_info['ip'], vdisk_name))
             VMHandler.convert_image(client, image_path, vdisk_name,
                                     edge_info)
             logger.info(
                 "Creating a tap blk device for image.{0}:{1}".format(
                     image_path, edge_info['ip'], vdisk_name))
             tap_dir = VMHandler.create_blktap_device(
                 client, vdisk_name, edge_info)
             vdisk_info[vdisk_name] = tap_dir
         fio_configuration = {
             'io_size': write_amount,
             'configuration': (0, 100)
         }
         DataWriter.write_data_fio(client,
                                   fio_configuration,
                                   file_locations=vdisk_info.values(),
                                   screen=False,
                                   loop_screen=False)
         fio_configuration = {
             'io_size': write_amount,
             'configuration': (100, 0)
         }
         DataWriter.write_data_fio(client,
                                   fio_configuration,
                                   file_locations=vdisk_info.values(),
                                   screen=False,
                                   loop_screen=False)
     except Exception as ex:
         logger.error(
             'An exception occur while testing edge+blktap: {0}'.format(
                 str(ex)))
         raise
     finally:
         for tap_conn in client.run(['tap-ctl', 'list']).splitlines():
             if not tap_conn.endswith(tuple(vdisk_info.keys())):
                 continue
             logger.info("Deleting tapctl connection {0}".format(tap_conn))
             tap_conn_pid = None
             tap_conn_minor = None
             for tap_conn_section in tap_conn.split():
                 if tap_conn_section.startswith('pid='):
                     tap_conn_pid = tap_conn_section.replace('pid=', '')
                 elif tap_conn_section.startswith('minor='):
                     tap_conn_minor = tap_conn_section.replace('minor=', '')
             if tap_conn_pid is None or tap_conn_minor is None:
                 raise ValueError(
                     'Unable to destroy the blocktap connection because its output format has changed.'
                 )
             client.run([
                 "tap-ctl", "destroy", "-p", tap_conn_pid, "-m",
                 tap_conn_minor
             ])
         for vdisk_name in vdisk_info.keys():
             VDiskRemover.remove_vdisk_by_name(vdisk_name, vpool.name)
예제 #10
0
    def validate_add_extend_remove_vpool(cls, timeout=ADD_EXTEND_REMOVE_VPOOL_TIMEOUT):
        """
        Validate if we can add, extend and/or remove a vPool, testing the following scenarios:
            * Normal with no accelerated backend
            * Accelerated vPool with hdd_backend & ssd_backend

        INFO:
            * at least 2 storagerouters should be available
            * at least 2 backends should be available with default preset

        :param timeout: specify a timeout
        :type timeout: int
        :return:
        """
        cls.LOGGER.info("Starting to validate add-extend-remove vpool")


        storagerouter_ips = []
        for storagerouter_ip in StoragerouterHelper.get_storagerouter_ips():
            try:
                RoleValidation.check_required_roles(VPoolSetup.REQUIRED_VPOOL_ROLES, storagerouter_ip, "LOCAL")
                storagerouter_ips.append(storagerouter_ip)
                cls.LOGGER.info("Added `{0}` to list of eligible storagerouters".format(storagerouter_ip))
            except RuntimeError as ex:
                cls.LOGGER.warning("Did not add `{0}` to list of eligible "
                                              "storagerouters because: {1}".format(storagerouter_ip, ex))
                pass

        # Filter storagerouters without required roles
        assert len(storagerouter_ips) > 1, "We need at least 2 storagerouters with valid roles: {0}"\
            .format(storagerouter_ips)
        alba_backends = BackendHelper.get_alba_backends()
        assert len(alba_backends) >= 2, "We need at least 2 or more backends!"

        # Global vdisk details
        vdisk_deployment_ip = storagerouter_ips[0]

        # Determine backends (2)
        hdd_backend = alba_backends[0]
        ssd_backend = alba_backends[1]

        # Add preset to all alba_backends (we only use the first two as seen above)
        for alba_backend in alba_backends[0:2]:
            cls.LOGGER.info("Adding custom preset to backend {0}".format(alba_backend.name))
            preset_result = BackendSetup.add_preset(albabackend_name=alba_backend.name,
                                                    preset_details=cls.PRESET,
                                                    timeout=cls.PRESET_CREATE_TIMEOUT)
            assert preset_result is True, 'Failed to add preset to backend {0}'.format(alba_backend.name)
            cls.LOGGER.info("Finished adding custom preset to backend {0}".format(alba_backend.name))

        # Vpool configs, regressing https://github.com/openvstorage/alba/issues/560 & more
        vpool_configs = {
            "no_fragment_cache_on_disk": {
                "strategy": {"cache_on_read": False, "cache_on_write": False},
                "location": "disk"
            },
            "no_fragment_cache_on_accel": {
                "strategy": {"cache_on_read": False, "cache_on_write": False},
                "location": "backend",
                "backend": {
                    "name": ssd_backend.name,
                    "preset": cls.PRESET['name']
                }
            }
        }

        for cfg_name, cfg in vpool_configs.iteritems():
            # Create vpool
            block_cache_cfg = None
            if SystemHelper.get_ovs_version().lower() == 'ee':
                block_cache_cfg = cfg
            for storagerouter_ip in storagerouter_ips:
                cls.LOGGER.info("Add/extend vPool `{0}` on storagerouter `{1}`".format(cls.VPOOL_NAME, storagerouter_ip))
                start = time.time()
                try:
                    cls._add_vpool(vpool_name=cls.VPOOL_NAME, fragment_cache_cfg=cfg,
                                              block_cache_cfg=block_cache_cfg, albabackend_name=hdd_backend.name, timeout=timeout,
                                              preset_name=cls.PRESET['name'], storagerouter_ip=storagerouter_ip)
                except TimeOutError:
                    cls.LOGGER.warning('Adding/extending the vpool has timed out after {0}s. Polling for another {1}s.'
                                                  .format(timeout, cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING - timeout))
                    # Lets be a bit forgiving and give the fwk 5 mins to actually complete the task
                    vpool = VPoolHelper.get_vpool_by_name(cls.VPOOL_NAME)
                    while vpool.status != 'RUNNING':
                        if time.time() - start > cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING:
                            raise RuntimeError('The vpool was not added or extended after {0}s'.format(cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING))
                        cls.LOGGER.warning('Vpool status is still {0} after {1}s.'.format(vpool.status, time.time() - start))
                        time.sleep(1)
                        vpool.discard()
                    cls.LOGGER.warning('The vpool was added or extended after {0}s.'.format(time.time() - start))
                except RuntimeError as ex:
                    cls.LOGGER.error('Adding/extending the vpool has failed with {0}.'.format(str(ex)))
                    raise
                # Check #proxies
                vpool = VPoolHelper.get_vpool_by_name(cls.VPOOL_NAME)
                for storagedriver in vpool.storagedrivers:
                    assert len(storagedriver.alba_proxies) == 2, 'The vpool did not get setup with 2 proxies. Found {} instead.'.format(len(storagedriver.alba_proxies))
            # Deploy a vdisk
            vdisk_name = cls.PREFIX + cfg_name
            cls.LOGGER.info("Starting to create vdisk `{0}` on vPool `{1}` with size `{2}` on node `{3}`"
                                       .format(vdisk_name, cls.VPOOL_NAME, cls.VDISK_SIZE, vdisk_deployment_ip))
            VDiskSetup.create_vdisk(vdisk_name=vdisk_name + '.raw',
                                    vpool_name=cls.VPOOL_NAME,
                                    size=cls.VDISK_SIZE,
                                    storagerouter_ip=vdisk_deployment_ip,
                                    timeout=cls.VDISK_CREATE_TIMEOUT)
            cls.LOGGER.info("Finished creating vdisk `{0}`".format(vdisk_name))
            cls.LOGGER.info("Starting to delete vdisk `{0}`".format(vdisk_name))
            VDiskRemover.remove_vdisk_by_name(vdisk_name, cls.VPOOL_NAME)
            cls.LOGGER.info("Finished deleting vdisk `{0}`".format(vdisk_name))

            # Delete vpool
            for storagerouter_ip in storagerouter_ips:
                storagedrivers_to_delete = len(vpool.storagedrivers)
                cls.LOGGER.info("Deleting vpool `{0}` on storagerouter `{1}`".format(cls.VPOOL_NAME, storagerouter_ip))
                try:
                    VPoolRemover.remove_vpool(vpool_name=cls.VPOOL_NAME, storagerouter_ip=storagerouter_ip, timeout=timeout)
                except TimeOutError:
                    try:
                        vpool.discard()  # Discard is needed to update the vpool status as it was running before
                        while vpool.status != 'RUNNING':
                            cls.LOGGER.warning('Removal/shrinking the vpool has timed out after {0}s. Polling for another {1}s.'
                                                          .format(timeout, cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING - timeout))
                            if time.time() - start > cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING:
                                raise RuntimeError('The vpool was not removed or extended after {0}s'.format(cls.ADD_EXTEND_REMOVE_VPOOL_TIMEOUT_FORGIVING))
                            cls.LOGGER.warning('Vpool status is still {0} after {1}s.'.format(vpool.status, time.time() - start))
                            time.sleep(1)
                            vpool.discard()
                    except ObjectNotFoundException:
                        if storagedrivers_to_delete != 1:  # Should be last one
                            raise
                except RuntimeError as ex:
                    cls.LOGGER.error('Shrinking/removing the vpool has failed with {0}.'.format(str(ex)))
                    raise
            cls.LOGGER.info('Vpool has been fully removed.')
        # Delete presets
        for alba_backend in alba_backends[0:2]:
            cls.LOGGER.info("Removing custom preset from backend {0}".format(alba_backend.name))
            remove_preset_result = BackendRemover.remove_preset(albabackend_name=alba_backend.name,
                                                                preset_name=cls.PRESET['name'],
                                                                timeout=cls.PRESET_REMOVE_TIMEOUT)
            assert remove_preset_result is True, 'Failed to remove preset from backend {0}'.format(alba_backend.name)
            cls.LOGGER.info("Finshed removing custom preset from backend {0}".format(alba_backend.name))

        cls.LOGGER.info("Finished to validate add-extend-remove vpool")
예제 #11
0
    def validate_post_reboot(tries=POST_REBOOT_TRIES,
                             timeout=POST_REBOOT_TIMEOUT):
        """
        Validate if all services come up after rebooting a node
        :param tries: amount of tries to check if ovs services are running
        :type tries: int
        :param timeout: timeout between tries
        :type timeout: int
        :return:
        """

        storagerouter_ips = list(StoragerouterHelper.get_storagerouter_ips())
        assert len(
            storagerouter_ips) >= 2, "We need at least 2 storagerouters!"

        PostRebootChecks.LOGGER.info('Starting election of node to reboot')
        local_host = SystemHelper.get_local_storagerouter(
        ).ip  # ip address of node where tests are being executed
        storagerouter_ips.remove(
            local_host
        )  # remove local ip address so we don't reboot where the tests are running
        host_to_reboot = storagerouter_ips[
            0]  # pick first node that we can find
        PostRebootChecks.LOGGER.info(
            'Finished election of node to reboot: {0}'.format(host_to_reboot))

        # setup beginning ssh connection
        client = PostRebootChecks.create_client(host_to_reboot)

        # reboot server and wait for it to come up
        PostRebootChecks.LOGGER.info(
            'Starting reboot of host `{0}`!'.format(host_to_reboot))
        client.run(" ( sleep {0} ; reboot ) &".format(
            PostRebootChecks.SSH_REBOOT_DELAY))
        time.sleep(10)

        ssh_tries = 0
        while ssh_tries < PostRebootChecks.SSH_WAIT_TRIES:
            try:
                PostRebootChecks.create_client(host_to_reboot)
                PostRebootChecks.LOGGER.info(
                    'host `{0}` is up again!'.format(host_to_reboot))
                break
            except Exception:
                ssh_tries += 1
                PostRebootChecks.LOGGER.warning(
                    'Host `{0}` still not up at try {1}/{2} ...'.format(
                        host_to_reboot, ssh_tries,
                        PostRebootChecks.SSH_WAIT_TRIES))
                time.sleep(10)  # timeout or else its going too fast
                if ssh_tries == PostRebootChecks.SSH_WAIT_TRIES:
                    # if we reach max tries, throw exception
                    raise RuntimeError(
                        "Max amounts of attempts reached ({0}) for host `{1}`, host still not up ..."
                        .format(ssh_tries, host_to_reboot))

        # commence test
        PostRebootChecks.LOGGER.info(
            'Starting post-reboot service check on node `{0}`'.format(
                host_to_reboot))
        amount_tries = 0
        non_running_services = None
        client = SSHClient(host_to_reboot, username='******')
        while tries >= amount_tries:
            non_running_services = SystemHelper.get_non_running_ovs_services(
                client)
            if len(non_running_services) == 0:
                break
            else:
                amount_tries += 1
                time.sleep(timeout)
        assert len(
            non_running_services
        ) == 0, "Found non running services `{0}` after reboot on node `{1}`".format(
            non_running_services, host_to_reboot)

        PostRebootChecks.LOGGER.info(
            'Starting post-reboot vPool check on node `{0}`'.format(
                host_to_reboot))

        PostRebootChecks.LOGGER.info(
            'Finished post-reboot check on node `{0}`'.format(host_to_reboot))
예제 #12
0
    def _execute_test(cls):
        """
        Mimics the healthcheck creating and deleting disks with the same name/devicename back to back
        :return: None
        """
        local_sr = SystemHelper.get_local_storagerouter()
        cls.LOGGER.info("Starting creation/deletion test.")
        # Elect vpool
        assert len(
            local_sr.storagedrivers
        ) > 0, 'Node {0} has no storagedriver. Cannot test {1}'.format(
            local_sr.ip, VDiskControllerTester.TEST_NAME)
        random_storagedriver = local_sr.storagedrivers[random.randint(
            0,
            len(local_sr.storagedrivers) - 1)]
        vpool = random_storagedriver.vpool
        disk_size = 1024**3
        disk_name = 'ci_scenario_rapid_create_delete_same_device'
        exceptions = []
        for loop in xrange(0, 100):
            test_passed = False
            try:
                cls.LOGGER.info("Creating new disk.")
                try:
                    VDiskController.create_new(disk_name, disk_size,
                                               random_storagedriver.guid)
                except Exception as ex:
                    cls.LOGGER.error(
                        'Creation failed. Got {0} in iteration {1}'.format(
                            str(ex), loop))
                    exceptions.append(
                        'Creation failed. Got {0} in iteration {1}'.format(
                            str(ex), loop))
                    continue
                cls.LOGGER.info("Fetching new disk.")
                try:
                    vdisk = VDiskHelper.get_vdisk_by_name(
                        '{0}.raw'.format(disk_name), vpool.name)
                except Exception as ex:
                    cls.LOGGER.error(
                        'Fetch failed. Got {0} in iteration {1}'.format(
                            str(ex), loop))
                    exceptions.append(
                        'Fetch failed. Got {0} in iteration {1}'.format(
                            str(ex), loop))
                    continue
                cls.LOGGER.info("Deleting new disk.")
                try:
                    VDiskController.delete(vdisk_guid=vdisk.guid)
                except Exception as ex:
                    cls.LOGGER.error(
                        'Delete failed. Got {0} in iteration {1}'.format(
                            str(ex), loop))
                    exceptions.append(
                        'Delete failed. Got {0} in iteration {1}'.format(
                            str(ex), loop))
                test_passed = True
            except Exception as ex:
                cls.LOGGER.error(
                    'Unexpected exception occurred during loop {0}. Got {1}.'.
                    format(loop, str(ex)))
            finally:
                try:
                    cls._cleanup_vdisk(disk_name, vpool.name, not test_passed)
                except Exception as ex:
                    cls.LOGGER.error(
                        "Auto cleanup failed with {0} in iteration {1}.".
                        format(str(ex), loop))
                    exceptions.append(
                        'Auto cleanup failed, got {0} in iteration {1}'.format(
                            str(ex), loop))

        assert len(
            exceptions
        ) == 0, 'Exception occurred during the creation of vdisks with the same devicename. Got {0}'.format(
            ', '.join(exceptions))

        cls.LOGGER.info("Finished create/delete test.")