Esempio n. 1
0
from vdsm.common.compat import pickle
from vdsm.common.define import NORMAL
from vdsm.common.network.address import normalize_literal_addr
from vdsm.common.units import MiB
from vdsm.virt.utils import DynamicBoundedSemaphore

from vdsm.virt import virdomain
from vdsm.virt import vmexitreason
from vdsm.virt import vmstatus

MODE_REMOTE = 'remote'
MODE_FILE = 'file'

METHOD_ONLINE = 'online'

incomingMigrations = DynamicBoundedSemaphore(
    max(1, config.getint('vars', 'max_incoming_migrations')))

CONVERGENCE_SCHEDULE_SET_DOWNTIME = "setDowntime"
CONVERGENCE_SCHEDULE_POST_COPY = "postcopy"
CONVERGENCE_SCHEDULE_SET_ABORT = "abort"

ADDRESS = '0'
PORT = 54321


class MigrationDestinationSetupError(RuntimeError):
    """
    Failed to create migration destination VM.
    """

Esempio n. 2
0
class SourceThread(object):
    """
    A thread that takes care of migration on the source vdsm.
    """
    _RECOVERY_LOOP_PAUSE = 10

    ongoingMigrations = DynamicBoundedSemaphore(1)

    def __init__(self,
                 vm,
                 dst='',
                 dstparams='',
                 mode=MODE_REMOTE,
                 method=METHOD_ONLINE,
                 tunneled=False,
                 dstqemu='',
                 abortOnError=False,
                 consoleAddress=None,
                 compressed=False,
                 autoConverge=False,
                 recovery=False,
                 encrypted=False,
                 **kwargs):
        self.log = vm.log
        self._vm = vm
        self._dst = dst
        self._mode = mode
        self._dstparams = dstparams
        self._enableGuestEvents = kwargs.get('enableGuestEvents', False)
        # TODO: conv.tobool shouldn't be used in this constructor, the
        # conversions should be handled properly in the API layer
        self._consoleAddress = consoleAddress
        self._dstqemu = dstqemu
        self._encrypted = encrypted
        self._maxBandwidth = int(
            kwargs.get('maxBandwidth')
            or config.getint('vars', 'migration_max_bandwidth'))
        self._incomingLimit = kwargs.get('incomingLimit')
        self._outgoingLimit = kwargs.get('outgoingLimit')
        self.status = {
            'status': {
                'code': 0,
                'message': 'Migration in progress'
            }
        }
        # we need to guard against concurrent updates only
        self._lock = threading.Lock()
        self._progress = 0
        self._thread = concurrent.thread(self.run,
                                         name='migsrc/' + self._vm.id[:8])
        self._preparingMigrationEvt = True
        self._migrationCanceledEvt = threading.Event()
        self._monitorThread = None
        self._destServer = None
        self._legacy_payload_path = None
        if 'convergenceSchedule' in kwargs:
            self._convergence_schedule = kwargs['convergenceSchedule']
        else:
            # Needed for Engine < 4.3 or when legacy migration is used
            # as a supposedly rare fallback in Engine >= 4.3.
            self._convergence_schedule = \
                self._legacy_convergence_schedule(kwargs.get('downtime'))
            self.log.info(
                'using a computed convergence schedule for '
                'a legacy migration: %s', self._convergence_schedule)
        self.log.debug('convergence schedule set to: %s',
                       str(self._convergence_schedule))
        self._started = False
        self._failed = False
        self._recovery = recovery
        tunneled = conv.tobool(tunneled)
        abortOnError = conv.tobool(abortOnError)
        compressed = conv.tobool(compressed)
        autoConverge = conv.tobool(autoConverge)
        self._migration_flags = self._calculate_migration_flags(
            tunneled, abortOnError, compressed, autoConverge, encrypted)

    def start(self):
        self._thread.start()

    def is_alive(self):
        return self._thread.is_alive()

    def migrating(self):
        """
        Return whether the thread currently manages a migration.

        That can be a migration directly supervised by the source thread and
        other threads (such as the downtime thread) or just an indirectly
        managed migration (detected on Vdsm recovery) without the threads
        actually running.
        """
        return ((self.is_alive() and not self._failed)
                or (self._recovery
                    and self._vm.lastStatus == vmstatus.MIGRATION_SOURCE))

    @property
    def started(self):
        return self._started

    @property
    def hibernating(self):
        return self._mode == MODE_FILE

    def _update_progress(self):
        if self._monitorThread is None:
            return

        # fetch migration status from the monitor thread
        if self._monitorThread.progress is not None:
            progress = self._monitorThread.progress.percentage
        else:
            progress = 0

        with self._lock:
            old_progress = self._progress
            if progress >= old_progress:
                self._progress = progress

        if progress < old_progress:
            self.log.info(
                'new computed progress %d < than old value %d, discarded',
                progress, old_progress)

    def getStat(self):
        """
        Get the status of the migration.
        """
        self._update_progress()
        self.status['progress'] = self._progress
        return self.status

    def _createClient(self, port):
        sslctx = sslutils.create_ssl_context()

        def is_ipv6_address(a):
            return (':' in a) and a.startswith('[') and a.endswith(']')

        if is_ipv6_address(self.remoteHost):
            host = self.remoteHost[1:-1]
        else:
            host = self.remoteHost

        client_socket = utils.create_connected_socket(host, int(port), sslctx)
        return self._vm.cif.createStompClient(client_socket)

    def _setupVdsConnection(self):
        if self.hibernating:
            return

        hostPort = _cannonize_host_port(
            self._dst, config.getint('addresses', 'management_port'))
        self.remoteHost, port = hostPort.rsplit(':', 1)

        client = self._createClient(port)
        requestQueues = config.get('addresses', 'request_queues')
        requestQueue = requestQueues.split(",")[0]
        self._destServer = jsonrpcvdscli.connect(requestQueue, client)
        self.log.debug('Initiating connection with destination')
        self._destServer.ping()

        self.log.debug('Destination server is: ' + hostPort)

    def _setupRemoteMachineParams(self):
        machineParams = self._vm.migration_parameters()
        machineParams['enableGuestEvents'] = self._enableGuestEvents
        if not self.hibernating:
            machineParams['migrationDest'] = 'libvirt'
        return machineParams

    def _prepareGuest(self):
        if self.hibernating:
            self.log.debug("Save State begins")
            if self._vm.guestAgent.isResponsive():
                lockTimeout = 30
            else:
                lockTimeout = 0
            self._vm.guestAgent.desktopLock()
            # wait for lock or timeout
            while lockTimeout:
                if self._vm.getStats()['session'] in ["Locked", "LoggedOff"]:
                    break
                time.sleep(1)
                lockTimeout -= 1
                if lockTimeout == 0:
                    self.log.warning('Agent ' + self._vm.id +
                                     ' unresponsive. Hiberanting without '
                                     'desktopLock.')
                    break
            self._vm.pause(vmstatus.SAVING_STATE)
        else:
            self.log.debug("Migration started")
            self._vm.lastStatus = vmstatus.MIGRATION_SOURCE

    def _recover(self, message):
        if not response.is_error(self.status):
            self.status = response.error('migrateErr')
        self.log.error(message)
        if not self.hibernating and self._destServer is not None:
            if self._vm.post_copy == PostCopyPhase.RUNNING:
                # We can't recover a VM after a failed post-copy migration.
                # And the destination takes care of the situation itself.
                self._vm.handle_failed_post_copy(clean_vm=True)
                return
            try:
                self._destServer.destroy(self._vm.id)
            except Exception:
                self.log.exception("Failed to destroy remote VM")
        # if the guest was stopped before migration, we need to cont it
        if self.hibernating:
            self._vm.cont(ignoreStatus=True)
            if self._enableGuestEvents:
                self._vm.guestAgent.events.after_hibernation_failure()
        elif self._enableGuestEvents:
            self._vm.guestAgent.events.after_migration_failure()
        # either way, migration has finished
        self._failed = True
        if self._recovery:
            self._vm.set_last_status(vmstatus.UP, vmstatus.MIGRATION_SOURCE)
            self._recovery = False
        else:
            self._vm.lastStatus = vmstatus.UP
        self._started = False
        self._vm.send_status_event()

    def _finishSuccessfully(self, machineParams):
        with self._lock:
            self._progress = 100
        if not self.hibernating:
            # TODO: We could use a timeout on the wait to be more robust
            # against "impossible" failures. But we don't have a good value to
            # use here now.
            self._vm.stopped_migrated_event_processed.wait()
            self._vm.setDownStatus(NORMAL, vmexitreason.MIGRATION_SUCCEEDED)
            self.status['status']['message'] = 'Migration done'
            if self._vm.post_copy == PostCopyPhase.RUNNING:
                self._vm.destroy()
        else:
            # don't pickle transient params
            for ignoreParam in ('displayIp', 'display', 'pid'):
                if ignoreParam in machineParams:
                    del machineParams[ignoreParam]

            fname = self._vm.cif.prepareVolumePath(self._dstparams)
            try:
                # Use r+ to avoid truncating the file, see BZ#1282239
                with io.open(fname, "r+b") as f:
                    # protocol=2 is needed for clusters < 4.4
                    # (for Python 2 host compatibility)
                    pickle.dump(machineParams, f, protocol=2)
            finally:
                self._vm.cif.teardownVolumePath(self._dstparams)

            self._vm.setDownStatus(NORMAL, vmexitreason.SAVE_STATE_SUCCEEDED)
            self.status['status']['message'] = 'SaveState done'

    @staticmethod
    def _raiseAbortError():
        e = libvirt.libvirtError(defmsg='')
        # we have to override the value to get what we want
        # err might be None
        e.err = (
            libvirt.VIR_ERR_OPERATION_ABORTED,  # error code
            libvirt.VIR_FROM_QEMU,  # error domain
            'operation aborted',  # error message
            libvirt.VIR_ERR_WARNING,  # error level
            '',
            '',
            '',  # str1, str2, str3,
            -1,
            -1)  # int1, int2
        raise e

    def _update_outgoing_limit(self):
        if self._outgoingLimit:
            self.log.debug('Setting outgoing migration limit to %s',
                           self._outgoingLimit)
            SourceThread.ongoingMigrations.bound = self._outgoingLimit

    @property
    def recovery(self):
        """
        Return whether the source thread handles a recovered migration.

        This is when we detect the VM is migrating in Vdsm recovery and the
        source thread is not actually running.

        This serves to handle a possible already running migration detected
        during Vdsm recovery, for which no regular source thread exists.  We
        don't try to touch such a migration, but we still must ensure at least
        basic sanity:

        - Indication that the migration is running.
        - Canceling the migration.
        - Putting the VM into proper status after migration failure (in case
          the migration succeeds, we rely on the fact that the VM disappears
          and Vdsm detects that sooner or later).

        .. note::

           Just setting this flag doesn't mean that any migration is actually
           running, it just means that if a migration is running then the
           migration was started by another Vdsm instance.  When this flag is
           set then the VM may be actually migrating only if its status is
           `vmstatus.MIGRATION_SOURCE` or `vmstatus.WAIT_FOR_LAUNCH` (the
           latter is mostly irrelevant since we prevent most actions in that
           status).
        """
        return self._recovery

    def run(self):
        if self.recovery:
            self._recovery_run()
        else:
            self._regular_run()

    def _regular_run(self):
        self.log.debug("Starting migration source thread")
        self._recovery = False
        self._update_outgoing_limit()
        try:
            startTime = time.time()
            # Guest agent API version must be updated before _srcDomXML
            # is created to have the version in _srcDomXML metadata.
            self._vm.update_guest_agent_api_version()
            machineParams = self._setupRemoteMachineParams()
            self._setupVdsConnection()
            self._prepareGuest()

            while not self._started:
                try:
                    self.log.info("Migration semaphore: acquiring")
                    with SourceThread.ongoingMigrations:
                        self.log.info("Migration semaphore: acquired")
                        timeout = config.getint(
                            'vars', 'guest_lifecycle_event_reply_timeout')
                        if self.hibernating:
                            self._vm.guestAgent.events.before_hibernation(
                                wait_timeout=timeout)
                        elif self._enableGuestEvents:
                            self._vm.guestAgent.events.before_migration(
                                wait_timeout=timeout)
                        if self._migrationCanceledEvt.is_set():
                            self._raiseAbortError()
                        self.log.debug(
                            "migration semaphore acquired "
                            "after %d seconds",
                            time.time() - startTime)
                        self._startUnderlyingMigration(time.time(),
                                                       machineParams)
                        self._finishSuccessfully(machineParams)
                except libvirt.libvirtError as e:
                    if e.get_error_code() == libvirt.VIR_ERR_OPERATION_ABORTED:
                        self.status = response.error(
                            'migCancelErr', message='Migration canceled')
                    raise
                except MigrationLimitExceeded:
                    retry_timeout = config.getint('vars',
                                                  'migration_retry_timeout')
                    self.log.debug(
                        "Migration destination busy. Initiating "
                        "retry in %d seconds.", retry_timeout)
                    self._migrationCanceledEvt.wait(retry_timeout)
        except MigrationDestinationSetupError as e:
            self._recover(str(e))
            # we know what happened, no need to dump hollow stack trace
        except Exception as e:
            self._recover(str(e))
            self.log.exception("Failed to migrate")

    def _startUnderlyingMigration(self, startTime, machineParams):
        if self.hibernating:
            self._started = True
            self._vm.hibernate(self._dst)
        else:
            self._vm.prepare_migration()

            # Do not measure the time spent for creating the VM on the
            # destination. In some cases some expensive operations can cause
            # the migration to get cancelled right after the transfer started.
            destCreateStartTime = time.time()
            result = self._destServer.migrationCreate(machineParams,
                                                      self._incomingLimit)
            destCreationTime = time.time() - destCreateStartTime
            startTime += destCreationTime
            self.log.info('Creation of destination VM took: %d seconds',
                          destCreationTime)

            if response.is_error(result):
                self.status = result
                if response.is_error(result, 'migrateLimit'):
                    raise MigrationLimitExceeded()
                else:
                    raise MigrationDestinationSetupError(
                        'migration destination error: ' +
                        result['status']['message'])

            self._started = True

            # REQUIRED_FOR: destination Vdsm < 4.3
            if not self._vm.min_cluster_version(4, 3):
                payload_drives = self._vm.payload_drives()
                if payload_drives:
                    # Currently, only a single payload device may be present
                    payload_alias = payload_drives[0].alias
                    result = self._destServer.fullList(vmList=(self._vm.id, ))
                    vm_list = result.get('items')
                    remote_devices = vm_list[0].get('devices')
                    if remote_devices is not None:
                        payload_path = next(
                            (d['path'] for d in remote_devices
                             if d.get('alias') == payload_alias), None)
                        if payload_path is not None:
                            self._legacy_payload_path = \
                                (payload_alias, payload_path)

            if config.getboolean('vars', 'ssl'):
                transport = 'tls'
            else:
                transport = 'tcp'
            duri = 'qemu+{}://{}/system'.format(
                transport, normalize_literal_addr(self.remoteHost))

            if self._encrypted:
                # TODO: Stop using host names here and set the host
                # name based certificate verification parameter once
                # the corresponding functionality is available in
                # libvirt, see https://bugzilla.redhat.com/1754533
                #
                # When an encrypted migration is requested, we must
                # use the host name (stored in 'dst') rather than the
                # IP address (stored in 'dstqemu') in order to match
                # the target certificate.  That means that encrypted
                # migrations are incompatible with setups that require
                # an IP address to identify the host properly, such as
                # when a separate migration network should be used or
                # when using IPv4/IPv6 dual stack configurations.
                dstqemu = self.remoteHost
            else:
                dstqemu = self._dstqemu
            if dstqemu:
                muri = 'tcp://{}'.format(normalize_literal_addr(dstqemu))
            else:
                muri = 'tcp://{}'.format(
                    normalize_literal_addr(self.remoteHost))

            self._vm.log.info('starting migration to %s '
                              'with miguri %s', duri, muri)
            self._monitorThread = MonitorThread(self._vm, startTime,
                                                self._convergence_schedule)
            self._perform_with_conv_schedule(duri, muri)
            self.log.info("migration took %d seconds to complete",
                          (time.time() - startTime) + destCreationTime)

    def _perform_migration(self, duri, muri):
        if self._vm.hasSpice and self._vm.conf.get('clientIp'):
            SPICE_MIGRATION_HANDOVER_TIME = 120
            self._vm._reviveTicket(SPICE_MIGRATION_HANDOVER_TIME)

        # FIXME: there still a race here with libvirt,
        # if we call stop() and libvirt migrateToURI3 didn't start
        # we may return migration stop but it will start at libvirt
        # side
        self._preparingMigrationEvt = False
        if not self._migrationCanceledEvt.is_set():
            self._vm._dom.migrateToURI3(duri, self._migration_params(muri),
                                        self._migration_flags)
        else:
            self._raiseAbortError()

    def _migration_params(self, muri):
        params = {libvirt.VIR_MIGRATE_PARAM_BANDWIDTH: self._maxBandwidth}
        if not self.tunneled:
            params[libvirt.VIR_MIGRATE_PARAM_URI] = str(muri)
        if self._consoleAddress:
            graphics = 'spice' if self._vm.hasSpice else 'vnc'
            params[libvirt.VIR_MIGRATE_PARAM_GRAPHICS_URI] = str(
                '%s://%s' % (graphics, self._consoleAddress))
        # REQUIRED_FOR: destination Vdsm < 4.3
        if self._legacy_payload_path is not None:
            alias, path = self._legacy_payload_path
            dom = xmlutils.fromstring(self._vm.migratable_domain_xml())
            source = dom.find(".//alias[@name='%s']/../source" % (alias, ))
            source.set('file', path)
            xml = xmlutils.tostring(dom)
            self._vm.log.debug("Migrating domain XML: %s", xml)
            params[libvirt.VIR_MIGRATE_PARAM_DEST_XML] = xml
        return params

    @property
    def tunneled(self):
        return self.migration_flags & libvirt.VIR_MIGRATE_TUNNELLED

    @property
    def migration_flags(self):
        return self._migration_flags

    def _calculate_migration_flags(self, tunneled, abort_on_error, compressed,
                                   auto_converge, encrypted):
        flags = libvirt.VIR_MIGRATE_LIVE | libvirt.VIR_MIGRATE_PEER2PEER
        if tunneled:
            flags |= libvirt.VIR_MIGRATE_TUNNELLED
        if abort_on_error:
            flags |= libvirt.VIR_MIGRATE_ABORT_ON_ERROR
        if compressed:
            flags |= libvirt.VIR_MIGRATE_COMPRESSED
        if auto_converge:
            flags |= libvirt.VIR_MIGRATE_AUTO_CONVERGE
        if encrypted:
            flags |= libvirt.VIR_MIGRATE_TLS
        if self._vm.min_cluster_version(4, 2):
            flags |= libvirt.VIR_MIGRATE_PERSIST_DEST
        # Migration may fail immediately when VIR_MIGRATE_POSTCOPY flag is
        # present in the following situations:
        # - The transport is not capable of full bidirectional
        #   connectivity: RDMA, tunnelled, pipe.
        # - Huge pages are used (doesn't apply to transparent huge pages).
        # - QEMU uses a file as a backing for memory.
        # - Perhaps non-shared block storage may cause some trouble.
        for stalling in self._convergence_schedule.get('stalling', []):
            action = stalling.get('action', {}).get('name')
            if action == CONVERGENCE_SCHEDULE_POST_COPY:
                flags |= libvirt.VIR_MIGRATE_POSTCOPY
                break
        return flags

    def _perform_with_conv_schedule(self, duri, muri):
        self._vm.log.debug('performing migration with conv schedule')
        with utils.running(self._monitorThread):
            self._perform_migration(duri, muri)
        self._monitorThread.join()

    def _legacy_convergence_schedule(self, max_downtime):
        # Simplified emulation of legacy non-scheduled migrations.
        if max_downtime is None:
            max_downtime = config.get('vars', 'migration_downtime')
        max_downtime = int(max_downtime)
        max_steps = config.getint('vars', 'migration_downtime_steps')
        downtimes = exponential_downtime(max_downtime, max_steps)

        def downtime_action(downtime):
            return {'params': [str(downtime)], 'name': 'setDowntime'}

        init = [downtime_action(next(downtimes))]
        stalling = []
        limit = 1
        for d in downtimes:
            stalling.append({'action': downtime_action(d), 'limit': limit})
            limit += 1
        stalling.append({'action': downtime_action(d), 'limit': 42})
        stalling.append({
            'action': {
                'params': [],
                'name': 'abort'
            },
            'limit': -1
        })
        return {'init': init, 'stalling': stalling}

    def set_max_bandwidth(self, bandwidth):
        self._vm.log.debug('setting migration max bandwidth to %d', bandwidth)
        self._maxBandwidth = bandwidth
        self._vm._dom.migrateSetMaxSpeed(bandwidth)

    def stop(self):
        # if its locks we are before the migrateToURI3()
        # call so no need to abortJob()
        try:
            self._migrationCanceledEvt.set()
            self._vm._dom.abortJob()
        except libvirt.libvirtError:
            if not self._preparingMigrationEvt:
                raise
        if self._recovery:
            self._recover("Migration stopped")

    def _recovery_run(self):
        self.log.debug("Starting migration recovery thread")
        while True:
            job_stats = self._vm.job_stats()
            if not ongoing(job_stats):
                break
            time.sleep(self._RECOVERY_LOOP_PAUSE)
        self.log.debug("Recovered migration finished")
        # Successful migration is handled in VM.onJobCompleted, here we need
        # just to ensure that migration failures are detected and handled.
        if self._vm._dom.state(0)[0] == libvirt.VIR_DOMAIN_RUNNING:
            self.recovery_cleanup()

    def recovery_cleanup(self):
        """
        Finish and cleanup recovery migration if necessary.

        This is to handle the situation when we detect a failed migration
        outside the source thread.  The source thread usually handles failed
        migrations itself.  But the thread is not running after recovery so in
        such a case the source thread must be notified about the failed
        migration.  This is what this method serves for.
        """
        if self._recovery and \
           self._vm.lastStatus == vmstatus.MIGRATION_SOURCE:
            self._recover("Migration failed")
Esempio n. 3
0
class SourceThread(object):
    """
    A thread that takes care of migration on the source vdsm.
    """
    ongoingMigrations = DynamicBoundedSemaphore(1)

    def __init__(self,
                 vm,
                 dst='',
                 dstparams='',
                 mode=MODE_REMOTE,
                 method=METHOD_ONLINE,
                 tunneled=False,
                 dstqemu='',
                 abortOnError=False,
                 consoleAddress=None,
                 compressed=False,
                 autoConverge=False,
                 **kwargs):
        self.log = vm.log
        self._vm = vm
        self._dst = dst
        self._mode = mode
        if method != METHOD_ONLINE:
            self.log.warning(
                'migration method %s is deprecated, forced to "online"',
                method)
        self._dstparams = dstparams
        self._enableGuestEvents = kwargs.get('enableGuestEvents', False)
        self._machineParams = {}
        # TODO: utils.tobool shouldn't be used in this constructor, the
        # conversions should be handled properly in the API layer
        self._tunneled = utils.tobool(tunneled)
        self._abortOnError = utils.tobool(abortOnError)
        self._consoleAddress = consoleAddress
        self._dstqemu = dstqemu
        self._downtime = kwargs.get('downtime') or \
            config.get('vars', 'migration_downtime')
        self._maxBandwidth = int(
            kwargs.get('maxBandwidth')
            or config.getint('vars', 'migration_max_bandwidth'))
        self._autoConverge = utils.tobool(autoConverge)
        self._compressed = utils.tobool(compressed)
        self._incomingLimit = kwargs.get('incomingLimit')
        self._outgoingLimit = kwargs.get('outgoingLimit')
        self.status = {
            'status': {
                'code': 0,
                'message': 'Migration in progress'
            }
        }
        self._progress = 0
        self._thread = concurrent.thread(self.run)
        self._preparingMigrationEvt = True
        self._migrationCanceledEvt = threading.Event()
        self._monitorThread = None
        self._destServer = None
        self._convergence_schedule = {'init': [], 'stalling': []}
        self._use_convergence_schedule = False
        if 'convergenceSchedule' in kwargs:
            self._convergence_schedule = kwargs.get('convergenceSchedule')
            self._use_convergence_schedule = True
            self.log.debug('convergence schedule set to: %s',
                           str(self._convergence_schedule))

    def start(self):
        self._thread.start()

    def is_alive(self):
        return self._thread.is_alive()

    @property
    def hibernating(self):
        return self._mode == MODE_FILE

    def getStat(self):
        """
        Get the status of the migration.
        """
        if self._monitorThread is not None:
            # fetch migration status from the monitor thread
            if self._monitorThread.progress is not None:
                self._progress = self._monitorThread.progress.percentage
            else:
                self._progress = 0
        self.status['progress'] = self._progress

        stat = self._vm._dom.jobStats(libvirt.VIR_DOMAIN_JOB_STATS_COMPLETED)
        if 'downtime_net' in stat:
            self.status['downtime'] = stat['downtime_net']

        return self.status

    def _createClient(self, port):
        sslctx = sslutils.create_ssl_context()

        def is_ipv6_address(a):
            return (':' in a) and a.startswith('[') and a.endswith(']')

        if is_ipv6_address(self.remoteHost):
            host = self.remoteHost[1:-1]
        else:
            host = self.remoteHost

        client_socket = utils.create_connected_socket(host, int(port), sslctx)
        return self._vm.cif.createStompClient(client_socket)

    def _setupVdsConnection(self):
        if self.hibernating:
            return

        hostPort = vdscli.cannonizeHostPort(
            self._dst, config.getint('addresses', 'management_port'))
        self.remoteHost, port = hostPort.rsplit(':', 1)

        try:
            client = self._createClient(port)
            requestQueues = config.get('addresses', 'request_queues')
            requestQueue = requestQueues.split(",")[0]
            self._destServer = jsonrpcvdscli.connect(requestQueue, client)
            self.log.debug('Initiating connection with destination')
            self._destServer.ping()

        except (JsonRpcBindingsError, JsonRpcNoResponseError):
            if config.getboolean('vars', 'ssl'):
                self._destServer = vdscli.connect(
                    hostPort,
                    useSSL=True,
                    TransportClass=kaxmlrpclib.TcpkeepSafeTransport)
            else:
                self._destServer = kaxmlrpclib.Server('http://' + hostPort)

        self.log.debug('Destination server is: ' + hostPort)

    def _setupRemoteMachineParams(self):
        self._machineParams.update(self._vm.status())
        # patch VM config for targets < 3.1
        self._patchConfigForLegacy()
        self._machineParams['elapsedTimeOffset'] = \
            time.time() - self._vm._startTime
        vmStats = self._vm.getStats()
        if 'username' in vmStats:
            self._machineParams['username'] = vmStats['username']
        if 'guestIPs' in vmStats:
            self._machineParams['guestIPs'] = vmStats['guestIPs']
        if 'guestFQDN' in vmStats:
            self._machineParams['guestFQDN'] = vmStats['guestFQDN']
        self._machineParams['guestAgentAPIVersion'] = \
            self._vm.guestAgent.effectiveApiVersion
        for k in ('_migrationParams', 'pid'):
            if k in self._machineParams:
                del self._machineParams[k]
        if not self.hibernating:
            self._machineParams['migrationDest'] = 'libvirt'
        self._machineParams['_srcDomXML'] = self._vm._dom.XMLDesc(0)
        self._machineParams['enableGuestEvents'] = self._enableGuestEvents

    def _prepareGuest(self):
        if self.hibernating:
            self.log.debug("Save State begins")
            if self._vm.guestAgent.isResponsive():
                lockTimeout = 30
            else:
                lockTimeout = 0
            self._vm.guestAgent.desktopLock()
            # wait for lock or timeout
            while lockTimeout:
                if self._vm.getStats()['session'] in ["Locked", "LoggedOff"]:
                    break
                time.sleep(1)
                lockTimeout -= 1
                if lockTimeout == 0:
                    self.log.warning('Agent ' + self._vm.id +
                                     ' unresponsive. Hiberanting without '
                                     'desktopLock.')
                    break
            self._vm.pause(vmstatus.SAVING_STATE)
        else:
            self.log.debug("Migration started")
            self._vm.lastStatus = vmstatus.MIGRATION_SOURCE

    def _recover(self, message):
        if not response.is_error(self.status):
            self.status = response.error('migrateErr')
        self.log.error(message)
        if not self.hibernating and self._destServer is not None:
            try:
                self._destServer.destroy(self._vm.id)
            except Exception:
                self.log.exception("Failed to destroy remote VM")
        # if the guest was stopped before migration, we need to cont it
        if self.hibernating:
            self._vm.cont(ignoreStatus=True)
            if self._enableGuestEvents:
                self._vm.guestAgent.events.after_hibernation_failure()
        elif self._enableGuestEvents:
            self._vm.guestAgent.events.after_migration_failure()
        # either way, migration has finished
        self._vm.lastStatus = vmstatus.UP
        self._vm.send_status_event()

    def _finishSuccessfully(self):
        self._progress = 100
        if not self.hibernating:
            self._vm.setDownStatus(NORMAL, vmexitreason.MIGRATION_SUCCEEDED)
            self.status['status']['message'] = 'Migration done'
        else:
            # don't pickle transient params
            for ignoreParam in ('displayIp', 'display', 'pid'):
                if ignoreParam in self._machineParams:
                    del self._machineParams[ignoreParam]

            fname = self._vm.cif.prepareVolumePath(self._dstparams)
            try:
                # Use r+ to avoid truncating the file, see BZ#1282239
                with open(fname, "r+") as f:
                    pickle.dump(self._machineParams, f)
            finally:
                self._vm.cif.teardownVolumePath(self._dstparams)

            self._vm.setDownStatus(NORMAL, vmexitreason.SAVE_STATE_SUCCEEDED)
            self.status['status']['message'] = 'SaveState done'

    def _patchConfigForLegacy(self):
        """
        Remove from the VM config drives list "cdrom" and "floppy"
        items and set them up as full paths
        """
        # care only about "drives" list, since
        # "devices" doesn't cause errors
        if 'drives' in self._machineParams:
            for item in ("cdrom", "floppy"):
                new_drives = []
                for drive in self._machineParams['drives']:
                    if drive['device'] == item:
                        self._machineParams[item] = drive['path']
                    else:
                        new_drives.append(drive)
                self._machineParams['drives'] = new_drives

        # vdsm < 4.13 expect this to exist
        self._machineParams['afterMigrationStatus'] = ''

    @staticmethod
    def _raiseAbortError():
        e = libvirt.libvirtError(defmsg='')
        # we have to override the value to get what we want
        # err might be None
        e.err = (
            libvirt.VIR_ERR_OPERATION_ABORTED,  # error code
            libvirt.VIR_FROM_QEMU,  # error domain
            'operation aborted',  # error message
            libvirt.VIR_ERR_WARNING,  # error level
            '',
            '',
            '',  # str1, str2, str3,
            -1,
            -1)  # int1, int2
        raise e

    def _update_outgoing_limit(self):
        if self._outgoingLimit:
            self.log.debug('Setting outgoing migration limit to %s',
                           self._outgoingLimit)
            SourceThread.ongoingMigrations.bound = self._outgoingLimit

    def run(self):
        self._update_outgoing_limit()
        try:
            startTime = time.time()
            self._setupVdsConnection()
            self._setupRemoteMachineParams()
            self._prepareGuest()

            while self._progress < 100:
                try:
                    with SourceThread.ongoingMigrations:
                        timeout = config.getint(
                            'vars', 'guest_lifecycle_event_reply_timeout')
                        if self.hibernating:
                            self._vm.guestAgent.events.before_hibernation(
                                wait_timeout=timeout)
                        elif self._enableGuestEvents:
                            self._vm.guestAgent.events.before_migration(
                                wait_timeout=timeout)
                        if self._migrationCanceledEvt.is_set():
                            self._raiseAbortError()
                        self.log.debug(
                            "migration semaphore acquired "
                            "after %d seconds",
                            time.time() - startTime)
                        params = {
                            'dst': self._dst,
                            'mode': self._mode,
                            'method': METHOD_ONLINE,
                            'dstparams': self._dstparams,
                            'dstqemu': self._dstqemu,
                        }
                        with self._vm.migration_parameters(params):
                            self._vm.saveState()
                            self._startUnderlyingMigration(time.time())
                            self._finishSuccessfully()
                except libvirt.libvirtError as e:
                    if e.get_error_code() == libvirt.VIR_ERR_OPERATION_ABORTED:
                        self.status = response.error(
                            'migCancelErr', message='Migration canceled')
                    raise
                except MigrationLimitExceeded:
                    retry_timeout = config.getint('vars',
                                                  'migration_retry_timeout')
                    self.log.debug(
                        "Migration destination busy. Initiating "
                        "retry in %d seconds.", retry_timeout)
                    self._migrationCanceledEvt.wait(retry_timeout)
        except MigrationDestinationSetupError as e:
            self._recover(str(e))
            # we know what happened, no need to dump hollow stack trace
        except Exception as e:
            self._recover(str(e))
            self.log.exception("Failed to migrate")

    def _startUnderlyingMigration(self, startTime):
        if self.hibernating:
            hooks.before_vm_hibernate(self._vm._dom.XMLDesc(0), self._vm.conf)
            fname = self._vm.cif.prepareVolumePath(self._dst)
            try:
                self._vm._dom.save(fname)
            finally:
                self._vm.cif.teardownVolumePath(self._dst)
        else:
            for dev in self._vm._customDevices():
                hooks.before_device_migrate_source(dev._deviceXML,
                                                   self._vm.conf, dev.custom)
            hooks.before_vm_migrate_source(self._vm._dom.XMLDesc(0),
                                           self._vm.conf)

            # Do not measure the time spent for creating the VM on the
            # destination. In some cases some expensive operations can cause
            # the migration to get cancelled right after the transfer started.
            destCreateStartTime = time.time()
            result = self._destServer.migrationCreate(self._machineParams,
                                                      self._incomingLimit)
            destCreationTime = time.time() - destCreateStartTime
            startTime += destCreationTime
            self.log.info('Creation of destination VM took: %d seconds',
                          destCreationTime)

            if response.is_error(result):
                self.status = result
                if response.is_error(result, 'migrateLimit'):
                    raise MigrationLimitExceeded()
                else:
                    raise MigrationDestinationSetupError(
                        'migration destination error: ' +
                        result['status']['message'])
            if config.getboolean('vars', 'ssl'):
                transport = 'tls'
            else:
                transport = 'tcp'
            duri = 'qemu+%s://%s/system' % (transport, self.remoteHost)
            if self._vm.conf['_migrationParams']['dstqemu']:
                muri = 'tcp://%s' % \
                       self._vm.conf['_migrationParams']['dstqemu']
            else:
                muri = 'tcp://%s' % self.remoteHost

            self._vm.log.info('starting migration to %s '
                              'with miguri %s', duri, muri)

            self._monitorThread = MonitorThread(self._vm, startTime,
                                                self._convergence_schedule,
                                                self._use_convergence_schedule)

            if self._use_convergence_schedule:
                self._perform_with_conv_schedule(duri, muri)
            else:
                self._perform_with_downtime_thread(duri, muri)

            self.log.info("migration took %d seconds to complete",
                          (time.time() - startTime) + destCreationTime)

    def _perform_migration(self, duri, muri):
        if self._vm.hasSpice and self._vm.conf.get('clientIp'):
            SPICE_MIGRATION_HANDOVER_TIME = 120
            self._vm._reviveTicket(SPICE_MIGRATION_HANDOVER_TIME)

        # FIXME: there still a race here with libvirt,
        # if we call stop() and libvirt migrateToURI3 didn't start
        # we may return migration stop but it will start at libvirt
        # side
        self._preparingMigrationEvt = False
        if not self._migrationCanceledEvt.is_set():
            # TODO: use libvirt constants when bz#1222795 is fixed
            params = {
                VIR_MIGRATE_PARAM_URI: str(muri),
                VIR_MIGRATE_PARAM_BANDWIDTH: self._maxBandwidth
            }
            if self._consoleAddress:
                if self._vm.hasSpice:
                    graphics = 'spice'
                else:
                    graphics = 'vnc'
                params[VIR_MIGRATE_PARAM_GRAPHICS_URI] = str(
                    '%s://%s' % (graphics, self._consoleAddress))

            flags = (
                libvirt.VIR_MIGRATE_LIVE | libvirt.VIR_MIGRATE_PEER2PEER |
                (libvirt.VIR_MIGRATE_TUNNELLED if self._tunneled else 0) |
                (libvirt.VIR_MIGRATE_ABORT_ON_ERROR
                 if self._abortOnError else 0) |
                (libvirt.VIR_MIGRATE_COMPRESSED if self._compressed else 0) |
                (libvirt.VIR_MIGRATE_AUTO_CONVERGE
                 if self._autoConverge else 0))

            self._vm._dom.migrateToURI3(duri, params, flags)
        else:
            self._raiseAbortError()

    def _perform_with_downtime_thread(self, duri, muri):
        self._vm.log.debug('performing migration with downtime thread')
        self._monitorThread.downtime_thread = DowntimeThread(
            self._vm, int(self._downtime),
            config.getint('vars', 'migration_downtime_steps'))

        with utils.running(self._monitorThread):
            self._perform_migration(duri, muri)

        self._monitorThread.join()

    def _perform_with_conv_schedule(self, duri, muri):
        self._vm.log.debug('performing migration with conv schedule')
        with utils.running(self._monitorThread):
            self._perform_migration(duri, muri)
        self._monitorThread.join()

    def set_max_bandwidth(self, bandwidth):
        self._vm.log.debug('setting migration max bandwidth to %d', bandwidth)
        self._maxBandwidth = bandwidth
        self._vm._dom.migrateSetMaxSpeed(bandwidth)

    def stop(self):
        # if its locks we are before the migrateToURI3()
        # call so no need to abortJob()
        try:
            self._migrationCanceledEvt.set()
            self._vm._dom.abortJob()
        except libvirt.libvirtError:
            if not self._preparingMigrationEvt:
                raise