Esempio n. 1
0
 def makeListHost(self, l):
     h = Host()
     for e in range(0, len(self.hostOrder)):
         h.__dict__[self.hostOrder[e]] = l[e]
     h.up = boolean(h.up)
     h.decayed = boolean(h.decayed)
     h.state = int(h.state)
     if h.reserved is not None:
         h.reserved = eval(h.reserved)
     else:
         h.reserved = []
     return h
Esempio n. 2
0
	def __init__(self, config, client):
		self.config = config
		self.client = client
		self.hooks = []
		self.log = logging.getLogger(__file__)
		self.scheduleDelay = float(self.config.get("Primitive", "scheduleDelay"))
		self.densePack = boolean(self.config.get("Primitive", "densePack"))
		self.hosts = {}

		#  Zoni
		self.minServersOn = 3
		self.shutdownDelay = 300
		self.pcm = zoni.services.rpycservices.client("zoni", 12345).createConn()
		self.zoniStateFile = "/var/tmp/zoniStateFile"
		if os.path.exists(self.zoniStateFile):
			self.zoniState = self.__loadZoniState(self.zoniStateFile)
		else:
			self.zoniState = {}
			self.__initState()

		items = self.config.items("Primitive")
		items.sort()
		for item in items:
			(name, value) = item
			name = name.lower()
			if (name.startswith("hook")):
				try:
					self.hooks.append(instantiateImplementation(value, config, client, False))
				except:
					self.log.exception("Failed to load hook %s" % (value))
Esempio n. 3
0
 def makeListInstance(self, l):
     i = Instance()
     for e in range(0, len(self.instanceOrder)):
         i.__dict__[self.instanceOrder[e]] = l[e]
     i.state = int(i.state)
     i.decayed = boolean(i.decayed)
     i.disks = map(lambda x: DiskConfiguration(d=x), eval(i.disks))
     i.nics = map(lambda x: NetworkConfiguration(d=x), eval(i.nics))
     i.hints = eval(i.hints)
     return i
Esempio n. 4
0
def startClusterManager(config):
    global service, data

    dfs = instantiateImplementation(config.get("ClusterManager", "dfs"),
                                    config)
    data = instantiateImplementation(config.get("ClusterManager", "data"),
                                     config)
    service = instantiateImplementation(
        config.get("ClusterManager", "service"), config, data, dfs)

    if boolean(config.get("Security", "authAndEncrypt")):
        users = {}
        userDatabase = data.getUsers()
        for user in userDatabase.values():
            if user.passwd != None:
                users[user.name] = user.passwd
        users[config.get('AllowedUsers', 'nodeManagerUser')] = config.get(
            'AllowedUsers', 'nodeManagerPassword')
        users[config.get('AllowedUsers',
                         'agentUser')] = config.get('AllowedUsers',
                                                    'agentPassword')
        authenticator = TlsliteVdbAuthenticator.from_dict(users)

        # XXXstroucki ThreadedServer is liable to have exceptions
        # occur within if an endpoint is lost.
        t = ThreadedServer(service=rpycservices.ManagerService,
                           hostname='0.0.0.0',
                           port=int(config.get('ClusterManagerService',
                                               'port')),
                           auto_register=False,
                           authenticator=authenticator)
    else:
        t = ThreadedServer(service=rpycservices.ManagerService,
                           hostname='0.0.0.0',
                           port=int(config.get('ClusterManagerService',
                                               'port')),
                           auto_register=False)
    t.logger.setLevel(logging.ERROR)
    t.service.service = service
    t.service._type = 'ClusterManagerService'

    debugConsole(globals())

    t.start()
    # shouldn't exit by itself
    return
Esempio n. 5
0
	def start(self):
		oldInstances = {}
		muffle = {}
		while True:
			try:
				# Generate a list of VMs/host
				hosts = {}
				load = {}
				for h in self.client.getHosts():
					hosts[h.id] = h
					load[h.id] = []
				load[None] = []
				_instances = self.client.getInstances()
				instances = {}
				for i in _instances:
					instances[i.id] = i
				for i in instances.itervalues():
					if (i.hostId or i.state == InstanceState.Pending):
						load[i.hostId] = load[i.hostId] + [i.id]
				# Check for VMs that have exited


				for i in oldInstances:
					if (i not in instances and oldInstances[i].state != InstanceState.Pending):
						for hook in self.hooks:
							hook.postDestroy(oldInstances[i])
				# Schedule new VMs
				oldInstances = instances
				if (len(load.get(None, [])) > 0):
					load[None].sort()
					for i in load[None]:
						inst = instances[i]
						try:
							minMax = None
							minMaxHost = None
							targetHost = inst.hints.get("targetHost", None)
							try:
								allowElsewhere = boolean(inst.hints.get("allowElsewhere", "False"))
							except Exception, e:
								allowElsewhere = False
							#  TargetHost specified
							if (targetHost != None):
								for h in hosts.values():
									if ((str(h.id) == targetHost or h.name == targetHost)):
										#  make sure that host is up, in a normal state and is not reserved
										if (h.up == True and h.state == HostState.Normal and len(h.reserved) == 0):
											memUsage = reduce(lambda x, y: x + instances[y].memory, load[h.id], inst.memory)
											coreUsage = reduce(lambda x, y: x + instances[y].cores, load[h.id], inst.cores)
											if (memUsage <= h.memory and coreUsage <= h.cores):
												minMax = len(load[h.id])
												minMaxHost = h
								
										#  If a host machine is reserved, only allow if userid is in reserved list
										if ((len(h.reserved) > 0) and inst.userId in h.reserved):
											memUsage = reduce(lambda x, y: x + instances[y].memory, load[h.id], inst.memory)
											coreUsage = reduce(lambda x, y: x + instances[y].cores, load[h.id], inst.cores)
											if (memUsage <= h.memory and coreUsage <= h.cores):
												minMax = len(load[h.id])
												minMaxHost = h


							if ((targetHost == None or allowElsewhere) and minMaxHost == None):
								for h in hosts.values():
									if (h.up == True and h.state == HostState.Normal and len(h.reserved) == 0):
										if (minMax is None or (self.densePack and len(load[h.id]) > minMax) or (not self.densePack and len(load[h.id]) < minMax)):

											memUsage = reduce(lambda x, y: x + instances[y].memory, load[h.id], inst.memory)
											coreUsage = reduce(lambda x, y: x + instances[y].cores, load[h.id], inst.cores)

											if (memUsage <= h.memory and coreUsage <= h.cores):
												minMax = len(load[h.id])
												minMaxHost = h
							if (minMaxHost):
								if (not inst.hints.get("__resume_source", None)):
									for hook in self.hooks:
										hook.preCreate(inst)
								self.log.info("Scheduling instance %s (%d mem, %d cores, %d uid) on host %s" % (inst.name, inst.memory, inst.cores, inst.userId, minMaxHost.name))	
								self.client.activateVm(i, minMaxHost)
								load[minMaxHost.id] = load[minMaxHost.id] + [i]
								muffle.clear()
							else:
								if (inst.name not in muffle):
									self.log.info("Failed to find a suitable place to schedule %s" % (inst.name))
									muffle[inst.name] = True
						except Exception, e:
							if (inst.name not in muffle):
								self.log.exception("Failed to schedule or activate %s" % (inst.name))
								muffle[inst.name] = True
Esempio n. 6
0
    def __init__(self, config, dfs, nm):
        VmControlInterface.__init__(self, config, dfs, nm)
        self.QEMU_BIN = self.config.get("Qemu",
                                        "qemuBin",
                                        default="/usr/bin/kvm")
        self.INFO_DIR = self.config.get("Qemu",
                                        "infoDir",
                                        default="/var/tmp/VmControlQemu/")
        self.POLL_DELAY = float(self.config.get("Qemu", "pollDelay",
                                                default=1))
        self.migrationRetries = int(
            self.config.get("Qemu", "migrationRetries", default=10))
        self.monitorTimeout = float(
            self.config.get("Qemu", "monitorTimeout", default=60))
        self.migrateTimeout = float(
            self.config.get("Qemu", "migrateTimeout", default=300))
        self.useMigrateArgument = boolean(
            self.config.get("Qemu", "useMigrateArgument", default=False))
        self.statsInterval = float(
            self.config.get("Qemu", "statsInterval", default=0))
        reservedMem = self.config.get("Qemu", "reservedMem", default=512)
        reservedMem = int(reservedMem)

        self.reservedMem = reservedMem

        self.log = logging.getLogger(__file__)
        self.ifPrefix = "tashi"
        # keep a handle to my NM service
        self.service = None
        self.controlledVMs = {}
        self.hostname = socket.gethostname()
        self.usedPorts = []
        self.usedPortsLock = threading.Lock()
        self.vncPorts = []
        self.vncPortLock = threading.Lock()
        self.consolePort = 10000
        self.consolePortLock = threading.Lock()
        maxParallelMigrations = self.config.get("Qemu",
                                                "maxParallelMigrations")
        maxParallelMigrations = int(maxParallelMigrations)
        if maxParallelMigrations < 1:
            maxParallelMigrations = 1

        self.migrationSemaphore = threading.Semaphore(maxParallelMigrations)
        self.stats = {}

        self.suspendHandler = self.config.get("Qemu",
                                              "suspendHandler",
                                              default="gzip")
        self.resumeHandler = self.config.get("Qemu",
                                             "resumeHandler",
                                             default="zcat")

        self.scratchVg = self.config.get("Qemu", "scratchVg")

        self.scratchDir = self.config.get("Qemu", "scratchDir", default="/tmp")

        try:
            os.mkdir(self.INFO_DIR)
        except:
            pass

        self.__scanInfoDir()

        threading.Thread(target=self.__pollVMsLoop).start()
        if (self.statsInterval > 0):
            threading.Thread(target=self.statsThread).start()
Esempio n. 7
0
    def __startVm(self, instance, source):
        """Universal function to start a VM -- used by instantiateVM, resumeVM, and prepReceiveVM"""

        #  Capture __startVm Hints
        #  CPU hints
        cpuModel = instance.hints.get("cpumodel")

        cpuString = ""
        if cpuModel:
            # clean off whitespace
            cpuModel = self.__stripSpace(cpuModel)
            cpuString = "-cpu " + cpuModel

        #  Clock hints
        clockString = instance.hints.get("clock", "dynticks")
        # clean off whitespace
        clockString = self.__stripSpace(clockString)

        #  Disk hints
        # XXXstroucki: insert commentary on jcipar's performance
        # measurements
        # virtio is recommended, but linux will name devices
        # vdX instead of sdX. This adds a trap for someone who
        # converts a physical machine or other virtualization
        # layer's image to run under Tashi.
        diskInterface = instance.hints.get("diskInterface", "ide")
        # clean off whitespace
        diskInterface = self.__stripSpace(diskInterface)
        cachePolicy = instance.hints.get("cache", "off")
        cachePolicy = self.__stripSpace(cachePolicy)

        diskString = ""

        for index in range(0, len(instance.disks)):
            disk = instance.disks[index]
            uri = scrubString(disk.uri)
            imageLocal = self.dfs.getLocalHandle("images/" + uri)
            imageLocal = self.__dereferenceLink(imageLocal)
            thisDiskList = ["file=%s" % imageLocal]
            thisDiskList.append("if=%s" % diskInterface)
            thisDiskList.append("index=%d" % index)

            if (index == 0 and diskInterface == "virtio"):
                thisDiskList.append("boot=on")

            if (disk.persistent):
                snapshot = "off"
                migrate = "off"
            else:
                snapshot = "on"
                migrate = "on"

            thisDiskList.append("cache=%s" % cachePolicy)

            thisDiskList.append("snapshot=%s" % snapshot)

            if (self.useMigrateArgument):
                thisDiskList.append("migrate=%s" % migrate)

            diskString = diskString + "-drive " + ",".join(thisDiskList) + " "

        # scratch disk
        scratchSize = instance.hints.get("scratchSpace", "0")
        scratchSize = int(scratchSize)
        scratchName = None

        try:
            if scratchSize > 0:
                if self.scratchVg is None:
                    raise Exception, "No scratch volume group defined"
                # create scratch disk
                # XXXstroucki: needs to be cleaned somewhere
                # XXXstroucki: clean user provided instance name
                scratchName = "lv%s" % instance.name
                # XXXstroucki hold lock
                # XXXstroucki check for capacity
                cmd = "/sbin/lvcreate --quiet -n%s -L %dG %s" % (
                    scratchName, scratchSize, self.scratchVg)
                # XXXstroucki check result
                __result = subprocess.Popen(cmd.split(),
                                            executable=cmd.split()[0],
                                            stdout=subprocess.PIPE).wait()
                index += 1

                thisDiskList = [
                    "file=/dev/%s/%s" % (self.scratchVg, scratchName)
                ]
                thisDiskList.append("if=%s" % diskInterface)
                thisDiskList.append("index=%d" % index)
                thisDiskList.append("cache=%s" % cachePolicy)

                # XXXstroucki force scratch disk to be
                # persistent
                if (True or disk.persistent):
                    snapshot = "off"
                    migrate = "off"
                else:
                    snapshot = "on"
                    migrate = "on"

                thisDiskList.append("snapshot=%s" % snapshot)

                if (self.useMigrateArgument):
                    thisDiskList.append("migrate=%s" % migrate)

                diskString = "%s-drive %s " % (diskString,
                                               ",".join(thisDiskList))

        except:
            self.log.exception('caught exception in scratch disk formation')
            raise

        #  Nic hints
        nicModel = instance.hints.get("nicModel", "virtio")
        # clean off whitespace
        nicModel = self.__stripSpace(nicModel)

        nicString = ""
        nicNetworks = {}
        for i in range(0, len(instance.nics)):
            # Don't allow more than one interface per vlan
            nic = instance.nics[i]
            if nicNetworks.has_key(nic.network):
                continue
            nicNetworks[nic.network] = True

            nicString = nicString + "-net nic,macaddr=%s,model=%s,vlan=%d -net tap,ifname=%s%d.%d,vlan=%d,script=/etc/qemu-ifup.%d " % (
                nic.mac, nicModel, nic.network, self.ifPrefix, instance.id, i,
                nic.network, nic.network)

        #  ACPI
        if (boolean(instance.hints.get("noAcpi", False))):
            noAcpiString = "-no-acpi"
        else:
            noAcpiString = ""

        #  Construct the qemu command
        strCmd = "%s %s %s -clock %s %s %s -m %d -smp %d -serial null -vnc none -monitor pty -balloon virtio" % (
            self.QEMU_BIN, noAcpiString, cpuString, clockString, diskString,
            nicString, instance.memory, instance.cores)
        if (source):
            strCmd = '%s -incoming "%s"' % (strCmd, source)
        # XXXstroucki perhaps we're doing it backwards
        cmd = shlex.split(strCmd)

        self.log.info("Executing command: %s" % (strCmd))
        (pipe_r, pipe_w) = os.pipe()
        pid = os.fork()
        if (pid == 0):
            # child process
            pid = os.getpid()
            os.setpgid(pid, pid)
            os.close(pipe_r)
            os.dup2(pipe_w, sys.stderr.fileno())
            for i in [sys.stdin.fileno(), sys.stdout.fileno()]:
                try:
                    os.close(i)
                except:
                    pass
            for i in xrange(3, os.sysconf("SC_OPEN_MAX")):
                try:
                    os.close(i)
                except:
                    pass

            # XXXstroucki unfortunately no kvm option yet
            # to direct COW differences elsewhere, so change
            # this process' TMPDIR, which kvm will honour
            os.environ['TMPDIR'] = self.scratchDir
            os.execl(self.QEMU_BIN, *cmd)
            sys.exit(-1)

        # parent process
        os.close(pipe_w)

        # enforce the new instance to have our hostId!
        # otherwise, a migrated VM will have its previous hostId.
        instance.hostId = self.service.id
        child = self.anonClass(pid=pid,
                               instance=instance,
                               stderr=os.fdopen(pipe_r, 'r'),
                               migratingOut=False,
                               monitorHistory=[],
                               errorBit=True,
                               OSchild=True)
        child.ptyFile = None
        child.vncPort = -1
        child.instance.vmId = child.pid
        # XXXstroucki what about our hostId?
        # we need to make sure we don't report up a VM
        # with an inaccurate hostId.

        # Add a token to this new child object so that
        # we don't mistakenly clean up when matchHostPids
        # runs and the child process hasn't exec'ed yet.
        child.startTime = time.time()

        self.__saveChildInfo(child)
        self.log.info("Adding vmId %d" % (child.pid))
        self.controlledVMs[child.pid] = child
        return (child.pid, cmd)