Exemple #1
0
	def __start_live_migration(self):
		"""
		Start migration in live mode

		Migrate memory and fs to target host iteratively while possible,
		checkpoint process tree on source host and restore it on target host.
		"""

		self.fs.set_work_dir(self.img.work_dir())
		self.__validate_cpu()
		self.__validate_criu_version()
		use_pre_dumps = self.__check_use_pre_dumps()
		root_pid = self.htype.root_task_pid()

		migration_stats = mstats.live_stats()
		migration_stats.handle_start()

		# Handle preliminary FS migration
		logging.info("Preliminary FS migration")
		fsstats = self.fs.start_migration()
		migration_stats.handle_preliminary(fsstats)

		iter_index = 0
		prev_dstats = None

		while use_pre_dumps:

			# Handle predump
			logging.info("* Iteration %d", iter_index)
			self.target_host.start_iter(True)
			self.img.new_image_dir()
			criu_cr.criu_predump(self.htype, root_pid, self.img,
				self.criu_connection, self.fs)
			self.target_host.end_iter()

			# Handle FS migration iteration
			fsstats = self.fs.next_iteration()

			dstats = criu_api.criu_get_dstats(self.img)
			migration_stats.handle_iteration(dstats, fsstats)

			# Decide whether we continue iteration or stop and do final dump
			if not self.__check_live_iter_progress(iter_index, dstats, prev_dstats):
				break

			iter_index += 1
			prev_dstats = dstats

		# Dump htype on source and leave its tasks in frozen state
		logging.info("Final dump and restore")
		self.target_host.start_iter(self.htype.dump_need_page_server())
		self.img.new_image_dir()
		self.htype.final_dump(root_pid, self.img, self.criu_connection, self.fs)
		self.target_host.end_iter()

		try:
			# Handle final FS and images sync on frozen htype
			logging.info("Final FS and images sync")
			fsstats = self.fs.stop_migration()
			self.img.sync_imgs_to_target(self.target_host, self.htype,
				self.connection.mem_sk)

			# Restore htype on target
			logging.info("Asking target host to restore")
			self.target_host.restore_from_images()

		except:
			self.htype.migration_fail(self.fs)
			raise

		# Restored on target, can't fail starting from this point
		try:
			# Ack previous dump request to terminate all frozen tasks
			resp = self.criu_connection.ack_notify()
			if not resp.success:
				logging.warning("Bad notification from target host")

			dstats = criu_api.criu_get_dstats(self.img)
			migration_stats.handle_iteration(dstats, fsstats)

			logging.info("Migration succeeded")
			self.htype.migration_complete(self.fs, self.target_host)
			migration_stats.handle_stop(self)
			self.img.close()
			self.criu_connection.close()

		except Exception as e:
			logging.warning("Exception during final cleanup: %s", e)
Exemple #2
0
    def start_migration(self):

        migration_stats = mstats.migration_stats()
        prev_dstats = None
        iter_index = 0

        migration_stats.start()

        if not self.__force:
            self.validate_cpu()

        logging.info("Preliminary FS migration")
        self.fs.set_work_dir(self.img.work_dir())
        self.fs.start_migration()

        logging.info("Checking for Dirty Tracking")
        if self.pre_dump == PRE_DUMP_AUTO_DETECT:
            # pre-dump auto-detection
            try:
                self.pre_dump = self.pre_dump_check()
                if self.pre_dump:
                    logging.info("\t`- Auto Enabled")
                else:
                    logging.info("\t`- Auto Disabled")

            except:
                # The available criu seems to not
                # support memory tracking auto detection.
                self.pre_dump = PRE_DUMP_DISABLE
                logging.info("\t`- Auto detection not possible " "- Disabled")

        elif self.pre_dump == PRE_DUMP_DISABLE:
            logging.info("\t`- Command-line disabled")
        else:
            logging.info("\t`- Command-line enabled")

        if self.pre_dump:
            logging.info("Starting iterations")
        else:
            self.criu_connection.memory_tracking(False)

        while self.pre_dump:
            logging.info("* Iteration %d", iter_index)

            self.target_host.start_iter()
            self.img.new_image_dir()

            logging.info("\tIssuing pre-dump command to service")

            req = criu_req.make_predump_req(self.pid, self.htype, self.img,
                                            self.criu_connection, self.fs)
            resp = self.criu_connection.send_req(req)
            if not resp.success:
                raise Exception("Pre-dump failed")

            logging.info("\tPre-dump succeeded")

            self.target_host.end_iter()

            dstats = criu_api.criu_get_dstats(self.img)
            migration_stats.iteration(dstats)

            #
            # Need to decide whether we do next iteration
            # or stop on the existing and go do full dump
            # and restore
            #

            logging.info("Checking iteration progress:")

            if dstats.pages_written <= phaul_iter_min_size:
                logging.info("\t> Small dump")
                break

            if prev_dstats:
                w_add = dstats.pages_written - prev_dstats.pages_written
                w_add = w_add * 100 / prev_dstats.pages_written
                if w_add > phaul_iter_grow_max:
                    logging.info("\t> Iteration grows")
                    break

            if iter_index >= phaul_iter_max:
                logging.info("\t> Too many iterations")
                break

            iter_index += 1
            prev_dstats = dstats
            logging.info("\t> Proceed to next iteration")

            self.fs.next_iteration()

        #
        # Finish with iterations -- do full dump, send images
        # to target host and restore from them there
        #

        logging.info("Final dump and restore")

        self.target_host.start_iter()
        self.img.new_image_dir()

        logging.info("\tIssuing dump command to service")

        req = criu_req.make_dump_req(self.pid, self.htype, self.img,
                                     self.criu_connection, self.fs)
        resp = self.criu_connection.send_req(req)
        while True:
            if resp.type != pycriu.rpc.NOTIFY:
                raise Exception("Dump failed")

            if resp.notify.script == "post-dump":
                #
                # Dump is effectively over. Now CRIU
                # waits for us to do whatever we want
                # and keeps the tasks frozen.
                #
                break

            elif resp.notify.script == "network-lock":
                self.htype.net_lock()
            elif resp.notify.script == "network-unlock":
                self.htype.net_unlock()

            logging.info("\t\tNotify (%s)", resp.notify.script)
            resp = self.criu_connection.ack_notify()

        logging.info("Dump complete")
        self.target_host.end_iter()

        #
        # Dump is complete -- go to target node,
        # restore them there and kill (if required)
        # tasks on source node
        #

        logging.info("Final FS and images sync")
        self.fs.stop_migration()
        self.img.sync_imgs_to_target(self.target_host, self.htype,
                                     self.connection.mem_sk)

        logging.info("Asking target host to restore")
        self.target_host.restore_from_images()

        #
        # Ack the notify after restore -- CRIU would
        # then terminate all tasks and send us back
        # DUMP/success message
        #

        resp = self.criu_connection.ack_notify()
        if not resp.success:
            raise Exception("Dump screwed up")

        self.htype.umount()

        dstats = criu_api.criu_get_dstats(self.img)
        migration_stats.iteration(dstats)
        migration_stats.stop(self)
        self.img.close()
        self.criu_connection.close()
Exemple #3
0
	def start_migration(self):

		migration_stats = mstats.migration_stats()
		prev_dstats = None
		iter_index = 0

		migration_stats.start()

		self.validate_cpu()

		logging.info("Preliminary FS migration")
		self.fs.set_work_dir(self.img.work_dir())
		self.fs.start_migration()

		logging.info("Checking for Dirty Tracking")
		if self.pre_dump == PRE_DUMP_AUTO_DETECT:
			# pre-dump auto-detection
			try:
				self.pre_dump = (self.pre_dump_check() and self.htype.can_pre_dump())
				logging.info("\t`- Auto %s" % (self.pre_dump and 'enabled' or 'disabled'))
			except:
				# The available criu seems to not
				# support memory tracking auto detection.
				self.pre_dump = PRE_DUMP_DISABLE
				logging.info("\t`- Auto detection not possible "
						"- Disabled")
		else:
			logging.info("\t`- Command-line %s" % (self.pre_dump and 'enabled' or 'disabled'))

		if self.pre_dump:
			logging.info("Starting iterations")
		else:
			self.criu_connection.memory_tracking(False)

		while self.pre_dump:
			logging.info("* Iteration %d", iter_index)

			self.target_host.start_iter(True)
			self.img.new_image_dir()

			logging.info("\tIssuing pre-dump command to service")

			req = criu_req.make_predump_req(
				self.pid, self.img, self.criu_connection, self.fs)
			resp = self.criu_connection.send_req(req)
			if not resp.success:
				raise Exception("Pre-dump failed")

			logging.info("\tPre-dump succeeded")

			self.target_host.end_iter()

			dstats = criu_api.criu_get_dstats(self.img)
			migration_stats.iteration(dstats)

			#
			# Need to decide whether we do next iteration
			# or stop on the existing and go do full dump
			# and restore
			#

			logging.info("Checking iteration progress:")

			if dstats.pages_written <= phaul_iter_min_size:
				logging.info("\t> Small dump")
				break

			if prev_dstats:
				w_add = dstats.pages_written - prev_dstats.pages_written
				w_add = w_add * 100 / prev_dstats.pages_written
				if w_add > phaul_iter_grow_max:
					logging.info("\t> Iteration grows")
					break

			if iter_index >= phaul_iter_max:
				logging.info("\t> Too many iterations")
				break

			iter_index += 1
			prev_dstats = dstats
			logging.info("\t> Proceed to next iteration")

			self.fs.next_iteration()

		#
		# Finish with iterations -- do full dump, send images
		# to target host and restore from them there
		#

		logging.info("Final dump and restore")

		self.target_host.start_iter(self.htype.dump_need_ps())
		self.img.new_image_dir()

		logging.info("\tIssuing dump command to service")
		self.htype.final_dump(self.pid, self.img, self.criu_connection, self.fs)

		logging.info("Dump complete")
		self.target_host.end_iter()

		#
		# Dump is complete -- go to target node,
		# restore them there and kill (if required)
		# tasks on source node
		#

		logging.info("Final FS and images sync")
		self.fs.stop_migration()
		self.img.sync_imgs_to_target(self.target_host, self.htype,
			self.connection.mem_sk)

		logging.info("Asking target host to restore")
		self.target_host.restore_from_images()

		#
		# Ack the notify after restore -- CRIU would
		# then terminate all tasks and send us back
		# DUMP/success message
		#

		resp = self.criu_connection.ack_notify()
		if not resp.success:
			raise Exception("Dump screwed up")

		self.htype.umount()

		dstats = criu_api.criu_get_dstats(self.img)
		migration_stats.iteration(dstats)
		migration_stats.stop(self)
		self.img.close()
		self.criu_connection.close()
	def __start_live_migration(self):
		"""Start migration in live mode

		Migrate memory and fs to target host iteratively while possible,
		checkpoint process tree on source host and restore it on target host.
		"""

		self.fs.set_work_dir(self.img.work_dir())
		self.__validate_cpu()
		self.__validate_criu_version()
		use_pre_dumps = self.__check_use_pre_dumps()
		root_pid = self.htype.root_task_pid()

		migration_stats = mstats.live_stats()
		migration_stats.handle_start()

		# Handle preliminary FS migration
		logging.info("Preliminary FS migration")
		fsstats = self.fs.start_migration()
		migration_stats.handle_preliminary(fsstats)

		iter_index = 0
		prev_dstats = None

		while use_pre_dumps:

			# Handle predump
			logging.info("* Iteration %d", iter_index)
			self.target_host.start_iter(True)
			self.img.new_image_dir()
			criu_cr.criu_predump(self.htype, root_pid, self.img,
								self.criu_connection, self.fs)
			self.target_host.end_iter()

			# Handle FS migration iteration
			fsstats = self.fs.next_iteration()

			dstats = criu_api.criu_get_dstats(self.img)
			migration_stats.handle_iteration(dstats, fsstats)

			# Decide whether we continue iteration or stop and do final dump
			if not self.__check_live_iter_progress(iter_index, dstats,
												prev_dstats):
				break

			iter_index += 1
			prev_dstats = dstats

		# Dump htype on source and leave its tasks in frozen state
		logging.info("Final dump and restore")
		self.target_host.start_iter(self.htype.dump_need_page_server())
		self.img.new_image_dir()
		self.htype.final_dump(root_pid, self.img,
							self.criu_connection, self.fs)
		self.target_host.end_iter()

		try:
			# Handle final FS and images sync on frozen htype
			logging.info("Final FS and images sync")
			fsstats = self.fs.stop_migration()
			self.img.sync_imgs_to_target(self.target_host, self.htype,
										self.connection.mem_sk)

			# Restore htype on target
			logging.info("Asking target host to restore")
			self.target_host.restore_from_images()

		except Exception:
			self.htype.migration_fail(self.fs)
			raise

		# Restored on target, can't fail starting from this point
		try:
			# Ack previous dump request to terminate all frozen tasks
			resp = self.criu_connection.ack_notify()
			if not resp.success:
				logging.warning("Bad notification from target host")

			dstats = criu_api.criu_get_dstats(self.img)
			migration_stats.handle_iteration(dstats, fsstats)

			logging.info("Migration succeeded")
			self.htype.migration_complete(self.fs, self.target_host)
			migration_stats.handle_stop(self)
			self.img.close()
			self.criu_connection.close()

		except Exception as e:
			logging.warning("Exception during final cleanup: %s", e)
Exemple #5
0
	def start_migration(self):
		self._mstat.start()

		if not self.__force:
			self.validate_cpu()

		print "Preliminary FS migration"
		self.fs.set_work_dir(self.img.work_dir())
		self.fs.start_migration()

		print "Starting iterations"
		cc = self.criu

		while True:
			print "* Iteration %d" % self.iteration

			self.th.start_iter()
			self.img.new_image_dir()

			print "\tIssuing pre-dump command to service"

			req = self.make_dump_req(cr_rpc.PRE_DUMP)
			resp = cc.send_req(req)
			if not resp.success:
				raise Exception("Pre-dump failed")

			print "\tPre-dump succeeded"

			self.th.end_iter()

			stats = criu_api.criu_get_dstats(self.img)
			self._mstat.iteration(stats)

			#
			# Need to decide whether we do next iteration
			# or stop on the existing and go do full dump
			# and restore
			#

			print "Checking iteration progress:"

			if stats.pages_written <= phaul_iter_min_size:
				print "\t> Small dump"
				break;

			if self.prev_stats:
				w_add = stats.pages_written - self.prev_stats.pages_written
				w_add = w_add * 100 / self.prev_stats.pages_written
				if w_add > phaul_iter_grow_max:
					print "\t> Iteration grows"
					break

			if self.iteration >= phaul_iter_max:
				print "\t> Too many iterations"
				break

			self.iteration += 1
			self.prev_stats = stats
			print "\t> Proceed to next iteration"

			self.fs.next_iteration()

		#
		# Finish with iterations -- do full dump, send images
		# to target host and restore from them there
		#

		print "Final dump and restore"

		self.th.start_iter()
		self.img.new_image_dir()

		print "\tIssuing dump command to service"
		req = self.make_dump_req(cr_rpc.DUMP)
		req.opts.notify_scripts = True
		req.opts.file_locks = True
		req.opts.evasive_devices = True
		req.opts.link_remap = True
		if self.htype.can_migrate_tcp():
			req.opts.tcp_established = True

		resp = cc.send_req(req)
		while True:
			if resp.type != cr_rpc.NOTIFY:
				raise Exception("Dump failed")

			if resp.notify.script == "post-dump":
				#
				# Dump is effectively over. Now CRIU
				# waits for us to do whatever we want
				# and keeps the tasks frozen.
				#
				break

			elif resp.notify.script == "network-lock":
				self.htype.net_lock()
			elif resp.notify.script == "network-unlock":
				self.htype.net_unlock()

			print "\t\tNotify (%s)" % resp.notify.script
			resp = cc.ack_notify()

		print "Dump complete"
		self.th.end_iter()

		#
		# Dump is complete -- go to target node,
		# restore them there and kill (if required)
		# tasks on source node
		#

		print "Final FS and images sync"
		self.fs.stop_migration()
		self.img.sync_imgs_to_target(self.th, self.htype, self.data_sk)

		print "Asking target host to restore"
		self.th.restore_from_images()

		#
		# Ack the notify after restore -- CRIU would
		# then terminate all tasks and send us back
		# DUMP/success message
		#

		resp = cc.ack_notify()
		if not resp.success:
			raise Exception("Dump screwed up")

		self.htype.umount()

		stats = criu_api.criu_get_dstats(self.img)
		self._mstat.iteration(stats)
		self._mstat.stop(self)
		self.img.close()
		cc.close()
Exemple #6
0
    def start_migration(self):
        self._mstat.start()

        if not self.__force:
            self.validate_cpu()

        print "Preliminary FS migration"
        self.fs.set_work_dir(self.img.work_dir())
        self.fs.start_migration()

        print "Starting iterations"
        cc = self.criu

        while True:
            print "* Iteration %d" % self.iteration

            self.th.start_iter()
            self.img.new_image_dir()

            print "\tIssuing pre-dump command to service"

            req = self.make_dump_req(cr_rpc.PRE_DUMP)
            resp = cc.send_req(req)
            if not resp.success:
                raise Exception("Pre-dump failed")

            print "\tPre-dump succeeded"

            self.th.end_iter()

            stats = criu_api.criu_get_dstats(self.img)
            self._mstat.iteration(stats)

            #
            # Need to decide whether we do next iteration
            # or stop on the existing and go do full dump
            # and restore
            #

            print "Checking iteration progress:"

            if stats.pages_written <= phaul_iter_min_size:
                print "\t> Small dump"
                break

            if self.prev_stats:
                w_add = stats.pages_written - self.prev_stats.pages_written
                w_add = w_add * 100 / self.prev_stats.pages_written
                if w_add > phaul_iter_grow_max:
                    print "\t> Iteration grows"
                    break

            if self.iteration >= phaul_iter_max:
                print "\t> Too many iterations"
                break

            self.iteration += 1
            self.prev_stats = stats
            print "\t> Proceed to next iteration"

            self.fs.next_iteration()

        #
        # Finish with iterations -- do full dump, send images
        # to target host and restore from them there
        #

        print "Final dump and restore"

        self.th.start_iter()
        self.img.new_image_dir()

        print "\tIssuing dump command to service"
        req = self.make_dump_req(cr_rpc.DUMP)
        req.opts.notify_scripts = True
        req.opts.file_locks = True
        req.opts.evasive_devices = True
        req.opts.link_remap = True
        if self.htype.can_migrate_tcp():
            req.opts.tcp_established = True

        resp = cc.send_req(req)
        while True:
            if resp.type != cr_rpc.NOTIFY:
                raise Exception("Dump failed")

            if resp.notify.script == "post-dump":
                #
                # Dump is effectively over. Now CRIU
                # waits for us to do whatever we want
                # and keeps the tasks frozen.
                #
                break

            elif resp.notify.script == "network-lock":
                self.htype.net_lock()
            elif resp.notify.script == "network-unlock":
                self.htype.net_unlock()

            print "\t\tNotify (%s)" % resp.notify.script
            resp = cc.ack_notify()

        print "Dump complete"
        self.th.end_iter()

        #
        # Dump is complete -- go to target node,
        # restore them there and kill (if required)
        # tasks on source node
        #

        print "Final FS and images sync"
        self.fs.stop_migration()
        self.img.sync_imgs_to_target(self.th, self.htype, self.data_sk)

        print "Asking target host to restore"
        self.th.restore_from_images()

        #
        # Ack the notify after restore -- CRIU would
        # then terminate all tasks and send us back
        # DUMP/success message
        #

        resp = cc.ack_notify()
        if not resp.success:
            raise Exception("Dump screwed up")

        self.htype.umount()

        stats = criu_api.criu_get_dstats(self.img)
        self._mstat.iteration(stats)
        self._mstat.stop(self)
        self.img.close()
        cc.close()