예제 #1
0
 def start_snapshotting_threads(cls,
                                volume_bundle,
                                args=(),
                                kwargs=None,
                                logger=LOGGER):
     """
     Start the snapshotting threads
     :param volume_bundle: bundle of volumes
     :type volume_bundle: dict
     :param api: api instance
     :param logger: logging instance
     :return: 
     """
     if kwargs is None:
         kwargs = {}
     threads = []
     current_thread_bundle = {'index': 1, 'vdisks': []}
     logger.info('Starting threads.')
     try:
         for index, (vdisk_name,
                     vdisk_object) in enumerate(volume_bundle.iteritems(),
                                                1):
             vdisks = current_thread_bundle['vdisks']
             vdisks.append(vdisk_object)
             if index % cls.VDISK_THREAD_LIMIT == 0 or index == len(
                     volume_bundle.keys()):
                 threads.append(
                     ThreadHelper.start_thread_with_event(
                         target=cls._start_snapshots,
                         name='iops_{0}'.format(
                             current_thread_bundle['index']),
                         args=(vdisks, ) + args,
                         kwargs=kwargs))
                 current_thread_bundle['index'] = index + 1
                 current_thread_bundle['vdisks'] = []
     except Exception:
         for thread_pair in threads:  # Attempt to cleanup current inflight threads
             if thread_pair[0].isAlive():
                 thread_pair[1].set()
         # Wait for threads to die
         for thread_pair in threads:
             thread_pair[0].join()
         raise
     return threads
예제 #2
0
    def test_ha_fio(cls,
                    fio_bin_path,
                    cluster_info,
                    is_ee,
                    disk_amount=1,
                    timeout=CIConstants.HA_TIMEOUT,
                    logger=LOGGER):
        """
        Uses a modified fio to work with the openvstorage protocol
        :param fio_bin_path: path of the fio binary
        :type fio_bin_path: str
        :param cluster_info: information about the cluster, contains all dal objects
        :type cluster_info: dict
        :param is_ee: is it an ee version or not
        :type is_ee: bool
        :param disk_amount: amount of disks to test fail over with
        :type disk_amount: int
        :param timeout: timeout in seconds
        :type timeout: int
        :param logger: logging instance
        :return: None
        :rtype: NoneType
        """
        destination_storagedriver = cluster_info['storagedrivers'][
            'destination']
        source_storagedriver = cluster_info['storagedrivers']['source']
        vpool = destination_storagedriver.vpool

        compute_client = SSHClient(cluster_info['storagerouters']['compute'],
                                   username='******')

        vm_to_stop = cls.HYPERVISOR_INFO['vms'][
            source_storagedriver.storage_ip]['name']
        parent_hypervisor = HypervisorFactory().get()
        values_to_check = {
            'source_std': source_storagedriver.serialize(),
            'target_std': destination_storagedriver.serialize(),
            'vdisks': []
        }
        # Create vdisks
        protocol = source_storagedriver.cluster_node_config[
            'network_server_uri'].split(':')[0]
        edge_configuration = {
            'fio_bin_location': fio_bin_path,
            'hostname': source_storagedriver.storage_ip,
            'port': source_storagedriver.ports['edge'],
            'protocol': protocol,
            'volumenames': []
        }
        if is_ee is True:
            edge_configuration.update(cls.get_shell_user())

        vdisk_info = {}
        failed_configurations = []

        for index in xrange(0, disk_amount):
            try:
                vdisk_name = '{0}_vdisk{1}'.format(cls.TEST_NAME,
                                                   str(index).zfill(3))
                data_vdisk = VDiskHelper.get_vdisk_by_guid(
                    VDiskSetup.create_vdisk(vdisk_name, vpool.name,
                                            cls.AMOUNT_TO_WRITE,
                                            source_storagedriver.storage_ip))
                vdisk_info[vdisk_name] = data_vdisk
                edge_configuration['volumenames'].append(
                    data_vdisk.devicename.rsplit('.', 1)[0].split('/', 1)[1])
                values_to_check['vdisks'].append(data_vdisk.serialize())
            except TimeOutError:
                logger.error('Creating the vdisk has timed out.')
                raise
            except RuntimeError as ex:
                logger.error('Could not create the vdisk. Got {0}'.format(
                    str(ex)))
                raise
        configuration = random.choice(cls.DATA_TEST_CASES)
        threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None}}}
        vm_downed = False
        screen_names = []
        try:
            logger.info(
                'Starting threads.'
            )  # Separate because creating vdisks takes a while, while creating the threads does not

            io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads(
                volume_bundle=vdisk_info)
            threads['evented']['io']['pairs'] = io_thread_pairs
            threads['evented']['io']['r_semaphore'] = io_r_semaphore
            screen_names, output_files = DataWriter.write_data_fio(
                client=compute_client,
                fio_configuration={
                    'io_size': cls.AMOUNT_TO_WRITE,
                    'configuration': configuration
                },
                edge_configuration=edge_configuration)
            logger.info(
                'Doing IO for {0}s before bringing down the node.'.format(
                    cls.IO_TIME))
            ThreadingHandler.keep_threads_running(
                r_semaphore=io_r_semaphore,
                threads=io_thread_pairs,
                shared_resource=monitoring_data,
                duration=cls.IO_TIME)
            # Threads ready for monitoring at this point
            #########################
            # Bringing original owner of the volume down
            #########################
            try:
                logger.info('Stopping {0}.'.format(vm_to_stop))
                VMHandler.stop_vm(hypervisor=parent_hypervisor,
                                  vmid=vm_to_stop)
                downed_time = time.time()
                vm_downed = True
            except Exception as ex:
                logger.error('Failed to stop. Got {0}'.format(str(ex)))
                raise
            time.sleep(cls.IO_REFRESH_RATE * 2)
            # Start IO polling to verify nothing went down
            ThreadingHandler.poll_io(
                r_semaphore=io_r_semaphore,
                required_thread_amount=len(io_thread_pairs),
                shared_resource=monitoring_data,
                downed_time=downed_time,
                timeout=timeout,
                output_files=output_files,
                client=compute_client,
                disk_amount=disk_amount)
            cls._validate(values_to_check, monitoring_data)
        except Exception as ex:
            failed_configurations.append({
                'configuration': configuration,
                'reason': str(ex)
            })
        finally:
            for thread_category, thread_collection in threads[
                    'evented'].iteritems():
                ThreadHelper.stop_evented_threads(
                    thread_collection['pairs'],
                    thread_collection['r_semaphore'])
            if vm_downed is True:
                VMHandler.start_vm(parent_hypervisor, vm_to_stop)
                SystemHelper.idle_till_ovs_is_up(
                    source_storagedriver.storage_ip, **cls.get_shell_user())
                # @TODO: Remove when https://github.com/openvstorage/integrationtests/issues/540 is fixed
                FwkHandler.restart_all()
            if screen_names:
                for screen_name in screen_names:
                    compute_client.run(
                        ['screen', '-S', screen_name, '-X', 'quit'])
            for vdisk in vdisk_info.values():
                VDiskRemover.remove_vdisk(vdisk.guid)
        assert len(failed_configurations
                   ) == 0, 'Certain configuration failed: {0}'.format(
                       ' '.join(failed_configurations))
예제 #3
0
 def start_io_polling_threads(cls, volume_bundle, logger=LOGGER):
     """
     Will start the io polling threads
     :param volume_bundle: bundle of volumes {vdiskname: vdisk object}
     :type volume_bundle: dict
     :param logger: logger instance
     :type logger: ovs.log.log_handler.LogHandler
     :return: threads, monitoring_data, r_semaphore
     :rtype: tuple(list, dict, ci.api_lib.helpers.thread.Waiter)
     """
     required_thread_amount = math.ceil(
         float(len(volume_bundle.keys())) /
         cls.VDISK_THREAD_LIMIT)  # Amount of threads we will need
     r_semaphore = Waiter(
         required_thread_amount + 1, auto_reset=True
     )  # Add another target to let this thread control the semaphore
     threads = []
     monitoring_data = {}
     current_thread_bundle = {'index': 1, 'vdisks': []}
     logger.info(
         'Starting threads.'
     )  # Separate because creating vdisks takes a while, while creating the threads does not
     try:
         for index, (vdisk_name,
                     vdisk_object) in enumerate(volume_bundle.iteritems(),
                                                1):
             vdisks = current_thread_bundle['vdisks']
             volume_number_range = '{0}-{1}'.format(
                 current_thread_bundle['index'], index)
             vdisks.append(vdisk_object)
             if index % cls.VDISK_THREAD_LIMIT == 0 or index == len(
                     volume_bundle.keys()):
                 # New thread bundle
                 monitor_resource = {
                     'general': {
                         'io': [],
                         'edge_clients': {}
                     }
                 }
                 # noinspection PyTypeChecker
                 for vdisk in vdisks:
                     monitor_resource[vdisk.name] = {
                         'io': {
                             'down': [],
                             'descending': [],
                             'rising': [],
                             'highest': None,
                             'lowest': None
                         },
                         'edge_clients': {
                             'down': [],
                             'up': []
                         }
                     }
                 monitoring_data[volume_number_range] = monitor_resource
                 threads.append(
                     ThreadHelper.start_thread_with_event(
                         target=cls.monitor_changes,
                         name='iops_{0}'.format(
                             current_thread_bundle['index']),
                         args=(monitor_resource, vdisks, r_semaphore)))
                 current_thread_bundle['index'] = index + 1
                 current_thread_bundle['vdisks'] = []
     except Exception:
         for thread_pair in threads:  # Attempt to cleanup current inflight threads
             if thread_pair[0].isAlive():
                 thread_pair[1].set()
         while r_semaphore.get_counter() < len(
                 threads
         ):  # Wait for the number of threads we currently have.
             time.sleep(0.05)
         r_semaphore.wait(
         )  # Unlock them to let them stop (the object is set -> wont loop)
         # Wait for threads to die
         for thread_pair in threads:
             thread_pair[0].join()
         raise
     return threads, monitoring_data, r_semaphore
예제 #4
0
    def run_test(cls, vm_info, cluster_info, logger=LOGGER):
        """
        Tests the HA using a virtual machine which will write in his own filesystem
        :param cluster_info: information about the cluster, contains all dal objects
        :type cluster_info: dict
        :param vm_info: info about the vms
        :param logger: logging instance
        :return: None
        :rtype: NoneType
        """
        compute_client = SSHClient(cluster_info['storagerouters']['compute'],
                                   username='******')
        failed_configurations = []

        destination_storagedriver = cluster_info['storagedrivers'][
            'destination']
        source_storagedriver = cluster_info['storagedrivers']['source']

        # Cache to validate properties
        values_to_check = {
            'source_std': source_storagedriver.serialize(),
            'target_std': destination_storagedriver.serialize()
        }

        vm_to_stop = cls.HYPERVISOR_INFO['vms'][
            source_storagedriver.storage_ip]['name']
        parent_hypervisor = HypervisorFactory().get()
        # Extract vdisk info from vm_info
        vdisk_info = {}
        disk_amount = 0
        for vm_name, vm_object in vm_info.iteritems():
            for vdisk in vm_object['vdisks']:
                # Ignore the cd vdisk as no IO will come from it
                if vdisk.name == vm_object['cd_path'].replace(
                        '.raw', '').split('/')[-1]:
                    continue
                disk_amount += 1
                vdisk_info.update({vdisk.name: vdisk})

        with remote(compute_client.ip, [SSHClient]) as rem:
            configuration = random.choice(cls.DATA_TEST_CASES)
            threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None}}}
            output_files = []
            vm_downed = False
            try:
                logger.info('Starting the following configuration: {0}'.format(
                    configuration))
                for vm_name, vm_data in vm_info.iteritems():
                    vm_client = rem.SSHClient(vm_data['ip'], cls.VM_USERNAME,
                                              cls.VM_PASSWORD)
                    vm_client.file_create('/mnt/data/{0}.raw'.format(
                        vm_data['create_msg']))
                    vm_data['client'] = vm_client
                io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads(
                    volume_bundle=vdisk_info)
                threads['evented']['io']['pairs'] = io_thread_pairs
                threads['evented']['io']['r_semaphore'] = io_r_semaphore
                for vm_name, vm_data in vm_info.iteritems():  # Write data
                    screen_names, output_files = DataWriter.write_data_fio(
                        client=vm_data['client'],
                        fio_configuration={
                            'io_size': cls.AMOUNT_TO_WRITE,
                            'configuration': configuration
                        },
                        file_locations=[
                            '/mnt/data/{0}.raw'.format(vm_data['create_msg'])
                        ])
                    vm_data['screen_names'] = screen_names
                logger.info(
                    'Doing IO for {0}s before bringing down the node.'.format(
                        cls.IO_TIME))
                ThreadingHandler.keep_threads_running(
                    r_semaphore=io_r_semaphore,
                    threads=io_thread_pairs,
                    shared_resource=monitoring_data,
                    duration=cls.IO_TIME)
                # Threads ready for monitoring at this point
                #########################
                # Bringing original owner of the volume down
                #########################
                try:
                    logger.info('Stopping {0}.'.format(vm_to_stop))
                    VMHandler.stop_vm(hypervisor=parent_hypervisor,
                                      vmid=vm_to_stop)
                    vm_downed = True
                except Exception as ex:
                    logger.error('Failed to stop. Got {0}'.format(str(ex)))
                    raise
                downed_time = time.time()
                time.sleep(cls.IO_REFRESH_RATE * 2)
                # Start IO polling to verify nothing went down
                ThreadingHandler.poll_io(
                    r_semaphore=io_r_semaphore,
                    required_thread_amount=len(io_thread_pairs),
                    shared_resource=monitoring_data,
                    downed_time=downed_time,
                    timeout=cls.HA_TIMEOUT,
                    output_files=output_files,
                    client=compute_client,
                    disk_amount=disk_amount)
                cls._validate(values_to_check, monitoring_data)
            except Exception as ex:
                logger.error(
                    'Running the test for configuration {0} has failed because {1}'
                    .format(configuration, str(ex)))
                failed_configurations.append({
                    'configuration': configuration,
                    'reason': str(ex)
                })
            finally:
                for thread_category, thread_collection in threads[
                        'evented'].iteritems():
                    ThreadHelper.stop_evented_threads(
                        thread_collection['pairs'],
                        thread_collection['r_semaphore'])
                if vm_downed is True:
                    VMHandler.start_vm(parent_hypervisor, vm_to_stop)
                    logger.debug('Started {0}'.format(vm_to_stop))
                    SystemHelper.idle_till_ovs_is_up(
                        source_storagedriver.storage_ip,
                        **cls.get_shell_user())
                    # @TODO: Remove when https://github.com/openvstorage/integrationtests/issues/540 is fixed
                    FwkHandler.restart_all()
                for vm_name, vm_data in vm_info.iteritems():
                    for screen_name in vm_data.get('screen_names', []):
                        logger.debug('Stopping screen {0} on {1}.'.format(
                            screen_name, vm_data['client'].ip))
                        vm_data['client'].run(
                            ['screen', '-S', screen_name, '-X', 'quit'])
                    vm_data['screen_names'] = []
        assert len(failed_configurations
                   ) == 0, 'Certain configuration failed: {0}'.format(
                       ' '.join(failed_configurations))
예제 #5
0
    def run_test(cls, vm_info, cluster_info, logger=LOGGER):
        """
        Tests the DTL using a virtual machine which will write in his own filesystem
        Expects last data to be pulled from the DTL and not backend
        :param cluster_info: information about the cluster, contains all dal objects
        :type cluster_info: dict
        :param vm_info: info about the vms
        :param logger: logging instance
        :return: None
        :rtype: NoneType
        """
        source_std = cluster_info['storagedrivers']['source']
        source_client = SSHClient(source_std.storagerouter, username='******')

        compute_str = cluster_info['storagerouters']['compute']
        compute_client = SSHClient(compute_str)

        # setup hypervisor details
        parent_hypervisor = HypervisorFactory().get()
        vm_to_stop = cls.HYPERVISOR_INFO['vms'][source_std.storage_ip]['name']

        vdisk_info = {}
        disk_amount = 0
        for vm_name, vm_object in vm_info.iteritems():
            for vdisk in vm_object['vdisks']:
                # Ignore the cd vdisk as no IO will come from it
                if vdisk.name == vm_object['cd_path'].replace(
                        '.raw', '').split('/')[-1]:
                    continue
                disk_amount += 1
                vdisk_info.update({vdisk.name: vdisk})

        # Cache to validate properties
        values_to_check = {
            'source_std': source_std.serialize(),
            'vdisks': vdisk_info.values()
        }

        with remote(compute_str.ip, [SSHClient]) as rem:
            threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None}}}
            vm_downed = False
            output_files = []
            try:
                for vm_name, vm_data in vm_info.iteritems():
                    vm_client = rem.SSHClient(vm_data['ip'], cls.VM_USERNAME,
                                              cls.VM_PASSWORD)
                    vm_client.file_create('/mnt/data/{0}.raw'.format(
                        vm_data['create_msg']))
                    vm_data['client'] = vm_client
                    # Load dd, md5sum, screen & fio in memory
                    vm_data['client'].run([
                        'dd', 'if=/dev/urandom',
                        'of={0}'.format(cls.VM_RANDOM), 'bs=1M', 'count=2'
                    ])
                    vm_data['client'].run(['md5sum', cls.VM_RANDOM])

                logger.info("Stopping proxy services")
                service_manager = ServiceFactory.get_manager()

                for proxy in source_std.alba_proxies:
                    service_manager.restart_service(proxy.service.name,
                                                    client=source_client)

                logger.info(
                    'Starting to WRITE file while proxy is offline. All data should be stored in the DTL!'
                )
                for vm_name, vm_data in vm_info.iteritems():
                    vm_data['client'].run(
                        'dd if=/dev/urandom of={0} bs=1M count=2'.format(
                            cls.VM_FILENAME).split())
                    original_md5sum = ' '.join(vm_data['client'].run(
                        ['md5sum', cls.VM_FILENAME]).split())
                    vm_data['original_md5sum'] = original_md5sum
                    logger.info('Original MD5SUM for VM {0}: {1}.'.format(
                        vm_name, original_md5sum))
                logger.info('Finished to WRITE file while proxy is offline!')
                logger.info(
                    "Starting fio to generate IO for failing over.".format(
                        cls.IO_TIME))
                io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads(
                    volume_bundle=vdisk_info)
                threads['evented']['io']['pairs'] = io_thread_pairs
                threads['evented']['io']['r_semaphore'] = io_r_semaphore
                for vm_name, vm_data in vm_info.iteritems():  # Write data
                    screen_names, output_files = DataWriter.write_data_fio(
                        client=vm_data['client'],
                        fio_configuration={
                            'io_size': cls.AMOUNT_TO_WRITE,
                            'configuration': cls.IO_PATTERN
                        },
                        file_locations=[
                            '/mnt/data/{0}.raw'.format(vm_data['create_msg'])
                        ])
                    vm_data['screen_names'] = screen_names
                logger.info(
                    'Doing IO for {0}s before bringing down the node.'.format(
                        cls.IO_TIME))
                ThreadingHandler.keep_threads_running(
                    r_semaphore=io_r_semaphore,
                    threads=io_thread_pairs,
                    shared_resource=monitoring_data,
                    duration=cls.IO_TIME)
                ##############################################
                # Bringing original owner of the volume down #
                ##############################################
                VMHandler.stop_vm(hypervisor=parent_hypervisor,
                                  vmid=vm_to_stop)
                vm_downed = True
                downed_time = time.time()
                time.sleep(cls.IO_REFRESH_RATE * 2)
                # Start IO polling to verify nothing went down
                ThreadingHandler.poll_io(
                    r_semaphore=io_r_semaphore,
                    required_thread_amount=len(io_thread_pairs),
                    shared_resource=monitoring_data,
                    downed_time=downed_time,
                    timeout=cls.HA_TIMEOUT,
                    output_files=output_files,
                    client=compute_client,
                    disk_amount=disk_amount)
                logger.info('Starting to validate move...')
                cls._validate_move(values_to_check)
                logger.info('Finished validating move!')
                logger.info('Validate if DTL is working correctly!')
                unmatching_checksum_vms = []
                for vm_name, vm_data in vm_info.iteritems():
                    current_md5sum = ' '.join(vm_data['client'].run(
                        ['md5sum', cls.VM_FILENAME]).split())
                    if vm_data['original_md5sum'] != current_md5sum:
                        unmatching_checksum_vms.append(vm_name)
                assert len(
                    unmatching_checksum_vms
                ) == 0, 'Not all data was read from the DTL. Checksums do not line up for {}'.format(
                    ', '.join(unmatching_checksum_vms))
                logger.info('DTL is working correctly!')
            finally:
                for thread_category, thread_collection in threads[
                        'evented'].iteritems():
                    ThreadHelper.stop_evented_threads(
                        thread_collection['pairs'],
                        thread_collection['r_semaphore'])
                if vm_downed is True:
                    VMHandler.start_vm(parent_hypervisor, vm_to_stop)
                    logger.debug('Started {0}'.format(vm_to_stop))
                    SystemHelper.idle_till_ovs_is_up(source_std.storage_ip,
                                                     **cls.get_shell_user())
                    # @TODO: Remove when https://github.com/openvstorage/integrationtests/issues/540 is fixed
                    FwkHandler.restart_all()
                for vm_name, vm_data in vm_info.iteritems():
                    for screen_name in vm_data.get('screen_names', []):
                        logger.debug('Stopping screen {0} on {1}.'.format(
                            screen_name, vm_data['client'].ip))
                        vm_data['client'].run(
                            ['screen', '-S', screen_name, '-X', 'quit'])
                    vm_data['screen_names'] = []
예제 #6
0
    def run_test(cls, cluster_info, compute_client, vm_info, vm_username=CIConstants.VM_USERNAME, vm_password=CIConstants.VM_PASSWORD,
                 timeout=TEST_TIMEOUT, data_test_cases=CIConstants.DATA_TEST_CASES, logger=LOGGER):
        """
        Runs the test as described in https://github.com/openvstorage/dev_ops/issues/64
        :param cluster_info: information about the cluster
        :param compute_client: SSHclient of the computenode
        :param vm_info: vm information
        :param vm_username: username to login on all vms
        :param vm_password: password to login on all vms
        :param timeout: timeout in seconds
        :param data_test_cases: data rw ratios to test
        :param logger: logging instance
        :return: 
        """
        compute_str = cluster_info['storagerouters']['compute']
        destination_storagedriver = cluster_info['storagedrivers']['destination']
        source_storagedriver = cluster_info['storagedrivers']['source']

        # Cache to validate properties
        values_to_check = {
            'source_std': source_storagedriver.serialize(),
            'target_std': destination_storagedriver.serialize()
        }
        # Prep VM listener #
        failed_configurations = []
        # Extract vdisk info from vm_info - only get the data ones
        vdisk_info = {}
        disk_amount = 0
        for vm_name, vm_object in vm_info.iteritems():
            for vdisk in vm_object['vdisks']:
                if 'vdisk_data' in vdisk.name:
                    vdisk_info.update({vdisk.name: vdisk})
                    disk_amount += 1
        try:
            cls._adjust_automatic_scrubbing(disable=True)
            with remote(compute_str.ip, [SSHClient]) as rem:
                configuration = random.choice(data_test_cases)
                threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None},
                                       'snapshots': {'pairs': [], 'r_semaphore': None}}}
                output_files = []
                safety_set = False
                try:
                    logger.info('Starting the following configuration: {0}'.format(configuration))
                    for vm_name, vm_data in vm_info.iteritems():
                        vm_client = rem.SSHClient(vm_data['ip'], vm_username, vm_password)
                        vm_client.file_create('/mnt/data/{0}.raw'.format(vm_data['create_msg']))
                        vm_data['client'] = vm_client
                    cls._set_mds_safety(source_storagedriver.vpool, 1, checkup=True)  # Set the safety to trigger the mds
                    safety_set = True
                    io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads(volume_bundle=vdisk_info)
                    threads['evented']['io']['pairs'] = io_thread_pairs
                    threads['evented']['io']['r_semaphore'] = io_r_semaphore
                    # @todo snapshot every minute
                    threads['evented']['snapshots']['pairs'] = ThreadingHandler.start_snapshotting_threads(volume_bundle=vdisk_info, kwargs={'interval': 15})
                    for vm_name, vm_data in vm_info.iteritems():  # Write data
                        screen_names, output_files = DataWriter.write_data_fio(client=vm_data['client'],
                                                                               fio_configuration={
                                                                                   'io_size': cls.AMOUNT_TO_WRITE,
                                                                                   'configuration': configuration},
                                                                               file_locations=['/mnt/data/{0}.raw'.format(vm_data['create_msg'])])
                        vm_data['screen_names'] = screen_names
                    logger.info('Doing IO for {0}s before bringing down the node.'.format(cls.IO_TIME))
                    ThreadingHandler.keep_threads_running(r_semaphore=io_r_semaphore,
                                                          threads=io_thread_pairs,
                                                          shared_resource=monitoring_data,
                                                          duration=cls.IO_TIME / 2)
                    ThreadHelper.stop_evented_threads(threads['evented']['snapshots']['pairs'],
                                                      threads['evented']['snapshots']['r_semaphore'])  # Stop snapshotting
                    cls._delete_snapshots(volume_bundle=vdisk_info)
                    # Start scrubbing thread
                    async_scrubbing = cls.start_scrubbing(volume_bundle=vdisk_info)  # Starting to scrub
                    cls._trigger_mds_issue(cluster_info['vpool'], vdisk_info, destination_storagedriver.storagerouter.guid)  # Trigger mds failover while scrubber is busy
                    # Do some monitoring further for 60s
                    ThreadingHandler.keep_threads_running(r_semaphore=io_r_semaphore,
                                                          threads=io_thread_pairs,
                                                          shared_resource=monitoring_data,
                                                          duration=cls.IO_TIME / 2)
                    time.sleep(cls.IO_REFRESH_RATE * 2)
                    downed_time = time.time()
                    # Start IO polling to verify nothing went down
                    ThreadingHandler.poll_io(r_semaphore=io_r_semaphore,
                                             required_thread_amount=len(io_thread_pairs),
                                             shared_resource=monitoring_data,
                                             downed_time=downed_time,
                                             timeout=timeout,
                                             output_files=output_files,
                                             client=compute_client,
                                             disk_amount=disk_amount)
                    possible_scrub_errors = async_scrubbing.get()  # Wait until scrubbing calls have given a result
                    assert len(possible_scrub_errors) == 0, 'Scrubbing has encountered some errors: {0}'.format(', '.join(possible_scrub_errors))
                    cls._validate(values_to_check, monitoring_data)
                except Exception as ex:
                    logger.error('Running the test for configuration {0} has failed because {1}'.format(configuration, str(ex)))
                    failed_configurations.append({'configuration': configuration, 'reason': str(ex)})
                    raise
                finally:
                    for thread_category, thread_collection in threads['evented'].iteritems():
                        ThreadHelper.stop_evented_threads(thread_collection['pairs'], thread_collection['r_semaphore'])
                    for vm_name, vm_data in vm_info.iteritems():
                        for screen_name in vm_data.get('screen_names', []):
                            logger.debug('Stopping screen {0} on {1}.'.format(screen_name, vm_data['client'].ip))
                            vm_data['client'].run(['screen', '-S', screen_name, '-X', 'quit'])
                        vm_data['screen_names'] = []
                    if safety_set is True:
                        cls._set_mds_safety(source_storagedriver.vpool, len(StorageRouterList.get_masters()), checkup=True)
        finally:
            cls._adjust_automatic_scrubbing(disable=False)
        assert len(failed_configurations) == 0, 'Certain configuration failed: {0}'.format(' '.join(failed_configurations))
예제 #7
0
    def test_reroute_fio(cls, fio_bin_path, cluster_info, disk_amount=1, timeout=CIConstants.HA_TIMEOUT, is_ee=False, logger=LOGGER):
        """
        Uses a modified fio to work with the openvstorage protocol
        :param fio_bin_path: path of the fio binary
        :type fio_bin_path: str
        :param cluster_info: information about the cluster, contains all dal objects
        :type cluster_info: dict
        :param disk_amount: amount of disks to test fail over with
        :type disk_amount: int
        :param timeout: timeout in seconds
        :type timeout: int
        :param is_ee: is it the enterprise edition
        :type is_ee: bool
        :param logger: logger instance
        :type logger: ovs.log.log_handler.LogHandler
        :return: None
        :rtype: NoneType
        """
        compute_client = SSHClient(cluster_info['storagerouters']['compute'], username='******')

        destination_std = cluster_info['storagedrivers']['destination']
        source_std = cluster_info['storagedrivers']['source']  # will be downed
        vpool = source_std.vpool

        values_to_check = {
            'source_std': source_std.serialize(),
            'target_std': destination_std.serialize(),
            'vdisks': []
        }
        # Create vdisks
        protocol = source_std.cluster_node_config['network_server_uri'].split(':')[0]
        edge_configuration = {'fio_bin_location': fio_bin_path, 'hostname': source_std.storage_ip,
                              'port': source_std.ports['edge'],
                              'protocol': protocol,
                              'volumenames': []}
        vdisk_info = {}
        failed_configurations = []

        if is_ee is True:
            edge_configuration.update(cls.get_shell_user())

        for index in xrange(0, disk_amount):
            try:
                vdisk_name = '{0}_vdisk{1}'.format(EdgeTester.TEST_NAME, str(index).zfill(4))
                data_vdisk = VDiskHelper.get_vdisk_by_guid(VDiskSetup.create_vdisk(vdisk_name, vpool.name, EdgeTester.AMOUNT_TO_WRITE * 2, source_std.storage_ip))
                vdisk_info[vdisk_name] = data_vdisk
                edge_configuration['volumenames'].append(data_vdisk.devicename.rsplit('.', 1)[0].split('/', 1)[1])
                values_to_check['vdisks'].append(data_vdisk.serialize())
            except RuntimeError as ex:
                logger.error('Could not create the vdisk. Got {0}'.format(str(ex)))
                raise
        configuration = random.choice(cls.DATA_TEST_CASES)
        threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None}}}
        screen_names = []
        adjusted = False
        try:
            io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads(volume_bundle=vdisk_info)
            threads['evented']['io']['pairs'] = io_thread_pairs
            threads['evented']['io']['r_semaphore'] = io_r_semaphore
            screen_names, output_files = DataWriter.write_data_fio(client=compute_client,
                                                                   fio_configuration={'io_size': cls.AMOUNT_TO_WRITE,
                                                                                      'configuration': configuration},
                                                                   edge_configuration=edge_configuration)
            logger.info('Doing IO for {0}s before bringing down the node.'.format(cls.IO_TIME))
            ThreadingHandler.keep_threads_running(r_semaphore=io_r_semaphore,
                                                  threads=io_thread_pairs,
                                                  shared_resource=monitoring_data,
                                                  duration=cls.IO_TIME)
            # Threads ready for monitoring at this point, they are waiting to resume
            EdgeTester.adjust_for_reroute(source_std.storagerouter, trigger_rerout=True, ip_to_block=compute_client.ip, additional_ports=[edge_configuration['port']])
            adjusted = True
            downed_time = time.time()
            logger.info('Now waiting two refreshrate intervals to avoid caching. In total {}s'.format(EdgeTester.IO_REFRESH_RATE * 2))
            time.sleep(cls.IO_REFRESH_RATE * 2)
            ThreadingHandler.poll_io(r_semaphore=io_r_semaphore,
                                     required_thread_amount=len(io_thread_pairs),
                                     shared_resource=monitoring_data,
                                     downed_time=downed_time,
                                     timeout=timeout,
                                     output_files=output_files,
                                     client=compute_client,
                                     disk_amount=disk_amount)
            EdgeTester._validate_dal(values_to_check)  # Validate
        except Exception as ex:
            logger.error('Got an exception while running configuration {0}. Namely: {1}'.format(configuration, str(ex)))
            failed_configurations.append({'configuration': configuration, 'reason': str(ex)})
        finally:
            if adjusted is True:
                EdgeTester.adjust_for_reroute(source_std.storagerouter, trigger_rerout=False, ip_to_block=compute_client.ip, additional_ports=[edge_configuration['port']])
            for screen_name in screen_names:
                compute_client.run(['screen', '-S', screen_name, '-X', 'quit'])
            for thread_category, thread_collection in threads['evented'].iteritems():
                ThreadHelper.stop_evented_threads(thread_collection['pairs'], thread_collection['r_semaphore'])
            for vdisk in vdisk_info.values():
                VDiskRemover.remove_vdisk(vdisk.guid)
        assert len(failed_configurations) == 0, 'Certain configuration failed: {0}'.format(failed_configurations)
예제 #8
0
    def live_migrate(cls,
                     vm_info,
                     cluster_info,
                     disk_amount,
                     hypervisor_info,
                     logger=LOGGER):
        """
        Execute the live migration test
        Migrates the vm away using libvirt migrate call
        Expects the DAL to be updated due to the IO causing volumedriver to move the volume
        """
        failed_configurations = []

        destination_storagedriver = cluster_info['storagedrivers'][
            'destination']
        source_storagedriver = cluster_info['storagedrivers']['source']

        hv_credentials = HypervisorCredentials(
            ip=source_storagedriver.storage_ip,
            user=hypervisor_info['user'],
            password=hypervisor_info['password'],
            type=hypervisor_info['type'])
        source_hypervisor = HypervisorFactory().get(
            hv_credentials=hv_credentials)
        client = SSHClient(source_storagedriver.storagerouter)
        # Cache to validate properties
        values_to_check = {
            'source_std': source_storagedriver.serialize(),
            'target_std': destination_storagedriver.serialize()
        }

        # Extract vdisk info from vm_info
        vdisk_info = {}
        for vm_name, vm_object in vm_info.iteritems():
            for vdisk in vm_object['vdisks']:
                vdisk_info.update({vdisk.name: vdisk})

        with remote(source_storagedriver.storage_ip, [SSHClient]) as rem:
            test_run_nr = 0
            configuration = random.choice(cls.DATA_TEST_CASES)
            threads = {'evented': {'io': {'pairs': [], 'r_semaphore': None}}}
            output_files = []
            try:
                logger.info('Starting the following configuration: {0}'.format(
                    configuration))
                if test_run_nr == 0:  # Build reusable ssh clients
                    for vm_name, vm_data in vm_info.iteritems():
                        vm_client = rem.SSHClient(vm_data['ip'],
                                                  cls.VM_USERNAME,
                                                  cls.VM_PASSWORD)
                        vm_client.file_create('/mnt/data/{0}.raw'.format(
                            vm_data['create_msg']))
                        vm_data['client'] = vm_client
                else:
                    for vm_name, vm_data in vm_info.iteritems():
                        vm_data['client'].run([
                            'rm',
                            '/mnt/data/{0}.raw'.format(vm_data['create_msg'])
                        ])
                io_thread_pairs, monitoring_data, io_r_semaphore = ThreadingHandler.start_io_polling_threads(
                    volume_bundle=vdisk_info)
                threads['evented']['io']['pairs'] = io_thread_pairs
                threads['evented']['io']['r_semaphore'] = io_r_semaphore
                for vm_name, vm_data in vm_info.iteritems():  # Write data
                    screen_names, output_files = DataWriter.write_data_fio(
                        client=vm_data['client'],
                        fio_configuration={
                            'io_size': cls.AMOUNT_TO_WRITE,
                            'configuration': configuration
                        },
                        file_locations=[
                            '/mnt/data/{0}.raw'.format(vm_data['create_msg'])
                        ])
                    vm_data['screen_names'] = screen_names
                logger.info(
                    'Doing IO for {0}s before bringing down the node.'.format(
                        cls.IO_TIME))
                ThreadingHandler.keep_threads_running(
                    r_semaphore=io_r_semaphore,
                    threads=io_thread_pairs,
                    shared_resource=monitoring_data,
                    duration=cls.IO_TIME)
                # Threads ready for monitoring at this point
                #########################
                # Migrate the VMs
                #########################
                try:
                    logger.info('Migrating the VM.')
                    for vm_name in vm_info:
                        source_hypervisor.sdk.migrate(
                            vm_name, destination_storagedriver.storage_ip,
                            hypervisor_info['user'])
                except Exception as ex:
                    logger.error('Failed to stop. Got {0}'.format(str(ex)))
                    raise
                downed_time = time.time()
                time.sleep(cls.IO_REFRESH_RATE * 2)
                # Start IO polling to verify nothing went down
                ThreadingHandler.poll_io(
                    r_semaphore=io_r_semaphore,
                    required_thread_amount=len(io_thread_pairs),
                    shared_resource=monitoring_data,
                    downed_time=downed_time,
                    timeout=cls.FAILOVER_TIMEOUT,
                    output_files=output_files,
                    client=client,
                    disk_amount=disk_amount)
                # Do some more IO to trigger ownership migration
                ThreadingHandler.keep_threads_running(
                    r_semaphore=io_r_semaphore,
                    threads=io_thread_pairs,
                    shared_resource=monitoring_data,
                    duration=cls.IO_TIME)
                cls._validate_move(values_to_check)
            except Exception as ex:
                logger.error(
                    'Running the test for configuration {0} has failed because {1}'
                    .format(configuration, str(ex)))
                failed_configurations.append({
                    'configuration': configuration,
                    'reason': str(ex)
                })
            finally:
                for thread_category, thread_collection in threads[
                        'evented'].iteritems():
                    ThreadHelper.stop_evented_threads(
                        thread_collection['pairs'],
                        thread_collection['r_semaphore'])
                for vm_name, vm_data in vm_info.iteritems():
                    for screen_name in vm_data.get('screen_names', []):
                        logger.debug('Stopping screen {0} on {1}.'.format(
                            screen_name, vm_data['client'].ip))
                        vm_data['client'].run(
                            ['screen', '-S', screen_name, '-X', 'quit'])
                    vm_data['screen_names'] = []