def migrate_live(source, destination, vm, domain): """Live-migrates a VM via libvirt.""" # Reduce CPU pinning to minimum number of available cores on both # hypervisors to avoid "invalid cpuset" errors. props = DomainProperties.from_running(source, vm, domain) _live_repin_cpus( domain, props, min(source.dataset_obj['num_cpu'], destination.dataset_obj['num_cpu']), ) migrate_flags = ( VIR_MIGRATE_LIVE | # Do it live VIR_MIGRATE_PERSIST_DEST | # Define the VM on the new host VIR_MIGRATE_CHANGE_PROTECTION | # Protect source VM VIR_MIGRATE_NON_SHARED_DISK | # Copy non-shared storage VIR_MIGRATE_AUTO_CONVERGE | # Slow down VM if can't migrate memory VIR_MIGRATE_ABORT_ON_ERROR # Don't tolerate soft errors ) migrate_params = {} # Append OS-specific migration commands. They might not exist for some # combinations but this should have already been checked by the caller. migrate_flags |= MIGRATE_CONFIG.get( (source.dataset_obj['os'], destination.dataset_obj['os']))['flags'] log.info('Starting online migration of vm {} from {} to {}'.format( vm, source, destination, )) future = parallel( migrate_background, args=[[ domain, source, destination, migrate_params, migrate_flags, ]], workers=1, return_results=False, )[0] try: while future.running(): try: js = domain.jobStats() except libvirtError: # When migration is finished, jobStats will fail break if 'memory_total' in js and 'disk_total' in js: log.info( ('Migration progress: ' 'disk {:.0f}% {:.0f}/{:.0f}MiB, ' 'memory {:.0f}% {:.0f}/{:.0f}MiB, ').format( js['disk_processed'] / (js['disk_total'] + 1) * 100, js['disk_processed'] / 1024 / 1024, js['disk_total'] / 1024 / 1024, js['memory_processed'] / (js['memory_total'] + 1) * 100, js['memory_processed'] / 1024 / 1024, js['memory_total'] / 1024 / 1024, )) else: log.info('Waiting for migration stats to show up') time.sleep(1) except KeyboardInterrupt: domain.abortJob() log.info('Awaiting migration to abort') future.result() # Nothing to log, the function above raised an exception else: log.info('Awaiting migration to finish') future.result() # Exception from slave thread will re-raise here log.info('Migration finished') # And pin again, in case we migrated to a host with more physical cores domain = destination._get_domain(vm) _live_repin_cpus(domain, props, destination.dataset_obj['num_cpu'])
def _get_best_hypervisor( vm, hypervisor_states, offline=False, enforce_vm_env=False, soft_preferences=False, ): hv_filter = { 'servertype': 'hypervisor', 'vlan_networks': vm.route_network, 'state': Any(*hypervisor_states), } # Enforce IGVM_MODE used for tests if 'IGVM_MODE' in environ: hv_filter['environment'] = environ.get('IGVM_MODE') else: if enforce_vm_env: hv_filter['environment'] = vm.dataset_obj['environment'] # Get all (theoretically) possible HVs sorted by HV preferences hypervisors = ( Hypervisor(o) for o in Query(hv_filter, HYPERVISOR_ATTRIBUTES) ) hypervisors = sort_by_preference( vm, HYPERVISOR_PREFERENCES, hypervisors, soft_preferences, ) possible_hvs = OrderedDict() for possible_hv in hypervisors: possible_hvs[str(possible_hv)] = possible_hv # Check all HVs in parallel. This will check live data on those HVs # but without locking them. This allows us to do a real quick first # filtering round. Below follows another one on the filtered HVs only. chunk_size = 10 iterations = math.ceil(len(possible_hvs) / chunk_size) found_hv = None # We are checking HVs in chunks. This will enable us to select HVs early # without looping through all of them if unnecessary. for i in range(iterations): start_idx = i * chunk_size end_idx = start_idx + chunk_size hv_chunk = dict(list(possible_hvs.items())[start_idx:end_idx]) results = parallel( _check_vm, identifiers=list(hv_chunk.keys()), args=[ [possible_hv, vm, offline] for possible_hv in hv_chunk.values() ], workers=chunk_size, ) # Remove unsupported HVs from the list for checked_hv, success in results.items(): if not success: hv_chunk.pop(checked_hv) # Do another checking iteration, this time with HV locking for possible_hv in hv_chunk.values(): try: possible_hv.acquire_lock() except InvalidStateError as e: log.warning(e) continue if not _check_vm(possible_hv, vm, offline): possible_hv.release_lock() continue # HV found found_hv = possible_hv break if found_hv: break if not found_hv: # No supported HV was found raise IGVMError( 'Automatically finding the best Hypervisor failed! ' 'Can not find a suitable hypervisor with the preferences and ' 'the Query: {}'.format(hv_filter)) # Yield the hypervisor locked for working on it try: log.info('Picked {} as destination Hypervisor'.format(str(found_hv))) yield found_hv finally: found_hv.release_lock()
def _get_best_hypervisor(vm, hypervisor_states, offline=False): hv_env = environ.get('IGVM_MODE', 'production') # Get all (theoretically) possible HVs sorted by HV preferences hypervisors = (Hypervisor(o) for o in Query( { 'servertype': 'hypervisor', 'environment': hv_env, 'vlan_networks': vm.route_network, 'state': Any(*hypervisor_states), }, HYPERVISOR_ATTRIBUTES)) hypervisors = sorted_hypervisors(HYPERVISOR_PREFERENCES, vm, hypervisors) possible_hvs = OrderedDict() for possible_hv in hypervisors: possible_hvs[str(possible_hv)] = possible_hv # Check all HVs in parallel. This will check live data on those HVs # but without locking them. This allows us to do a real quick first # filtering round. Below follows another one on the filtered HVs only. results = parallel( _check_vm, identifiers=list(possible_hvs.keys()), args=[[possible_hv, vm, offline] for possible_hv in possible_hvs.values()], ) # Remove unsupported HVs from the list for checked_hv, success in results.items(): if not success: possible_hvs.pop(checked_hv) # No supported HV was found not_found_err = IGVMError( 'Cannot find hypervisor matching environment: {}, ' 'states: {}, vlan_network: {}, offline: {}'.format( hv_env, ', '.join(hypervisor_states), vm.route_network, offline, )) if len(possible_hvs) == 0: raise not_found_err # Do another checking iteration, this time with HV locking for possible_hv in possible_hvs.values(): try: possible_hv.acquire_lock() except InvalidStateError as e: log.warning(e) continue if not _check_vm(possible_hv, vm, offline): possible_hv.release_lock() continue try: yield possible_hv break finally: possible_hv.release_lock() else: raise not_found_err