def StopInstance(self, instance, force=False, retry=False, name=None, timeout=None): """Stop an instance. """ assert (timeout is None or force is not None) if name is None: name = instance.name if self._IsInstanceAlive(instance.name): lxc_stop_cmd = ["lxc-stop", "-n", name] if force: lxc_stop_cmd.append("--kill") result = utils.RunCmd(lxc_stop_cmd, timeout=timeout) if result.failed: raise HypervisorError("Failed to kill instance %s: %s" % (name, result.output)) else: # The --timeout=-1 option is needed to prevent lxc-stop performs # hard-stop(kill) for the container after the default timing out. lxc_stop_cmd.extend(["--nokill", "--timeout", "-1"]) result = utils.RunCmd(lxc_stop_cmd, timeout=timeout) if result.failed: logging.error("Failed to stop instance %s: %s", name, result.output)
def RebootInstance(self, instance): """Reboot an instance. This is not (yet) implemented for the chroot manager. """ raise HypervisorError("The chroot manager doesn't implement the" " reboot functionality")
def StartInstance(self, instance, block_devices, startup_paused): """Start an instance. For the chroot manager, we try to mount the block device and execute '/ganeti-chroot start'. """ root_dir = self._InstanceDir(instance.name) if not os.path.exists(root_dir): try: os.mkdir(root_dir) except IOError, err: raise HypervisorError("Failed to start instance %s: %s" % (instance.name, err)) if not os.path.isdir(root_dir): raise HypervisorError("Needed path %s is not a directory" % root_dir)
def StopInstance(self, instance, force=False, retry=False, name=None): """Stop an instance. This method has complicated cleanup tests, as we must: - try to kill all leftover processes - try to unmount any additional sub-mountpoints - finally unmount the instance dir """ if name is None: name = instance.name root_dir = self._InstanceDir(name) if not os.path.exists(root_dir): return if name in self.ListInstances(): # Signal init to shutdown; this is a hack if not retry and not force: result = utils.RunCmd(["chroot", root_dir, "poweroff"]) if result.failed: raise HypervisorError("Running 'poweroff' on the instance" " failed: %s" % result.output) time.sleep(2) result = utils.RunCmd(["lxc-stop", "-n", name]) if result.failed: logging.warning("Error while doing lxc-stop for %s: %s", name, result.output) if not os.path.ismount(root_dir): return for mpath in self._GetMountSubdirs(root_dir): result = utils.RunCmd(["umount", mpath]) if result.failed: logging.warning( "Error while umounting subpath %s for instance %s: %s", mpath, name, result.output) result = utils.RunCmd(["umount", root_dir]) if result.failed and force: msg = ("Processes still alive in the chroot: %s" % utils.RunCmd("fuser -vm %s" % root_dir).output) logging.error(msg) raise HypervisorError("Unmounting the chroot dir failed: %s (%s)" % (result.output, msg))
def _GetCgroupEnabledKernelSubsystems(cls): """Return cgroup subsystems list that are enabled in current kernel. """ try: subsys_table = utils.ReadFile(cls._PROC_CGROUPS_FILE) except EnvironmentError, err: raise HypervisorError("Failed to read cgroup info from %s: %s" % (cls._PROC_CGROUPS_FILE, err))
def RebootInstance(self, instance): """Reboot an instance. This is not (yet) implemented (in Ganeti) for the LXC hypervisor. """ # TODO: implement reboot raise HypervisorError("The LXC hypervisor doesn't implement the" " reboot functionality")
def _GetCgroupCpuUsage(cls, instance_name): """Return the CPU usage of an instance. """ try: cputime_ns = cls._GetCgroupInstanceValue(instance_name, "cpuacct.usage") except EnvironmentError, err: raise HypervisorError("Failed to get the cpu usage of %s: %s" % (instance_name, err))
def StartInstance(self, instance, block_devices, startup_paused): """Start an instance. For LXC, we try to mount the block device and execute 'lxc-start'. We use volatile containers. """ LXCHypervisor._VerifyDiskRequirements(block_devices) stash = {} # Since LXC version >= 1.0.0, the LXC strictly requires all cgroup # subsystems mounted before starting a container. # Try to mount all cgroup subsystems needed to start a LXC container. self._EnsureCgroupMounts(instance.hvparams) root_dir = self._InstanceDir(instance.name) try: utils.EnsureDirs([(root_dir, self._DIR_MODE)]) except errors.GenericError as err: raise HypervisorError("Creating instance directory failed: %s", str(err)) log_file = self._InstanceLogFilePath(instance) if not os.path.exists(log_file): _CreateBlankFile(log_file, constants.SECURE_FILE_MODE) try: sda_dev_path = block_devices[0][1] # LXC needs to use partition mapping devices to access each partition # of the storage sda_dev_path = self._PrepareInstanceRootFsBdev(sda_dev_path, stash) conf_file = self._InstanceConfFilePath(instance.name) conf = self._CreateConfigFile(instance, sda_dev_path) utils.WriteFile(conf_file, data=conf) logging.info("Starting LXC container") try: self._SpawnLXC(instance, log_file, conf_file) except: logging.error( "Failed to start instance %s. Please take a look at %s to" " see LXC errors.", instance.name, log_file) raise except: # Save the original error exc_info = sys.exc_info() try: self._CleanupInstance(instance.name, stash) except HypervisorError as err: logging.warn("Cleanup for instance %s incomplete: %s", instance.name, err) raise exc_info[0](exc_info[1]).with_traceback(exc_info[2]) self._SaveInstanceStash(instance.name, stash)
def StopInstance(self, instance, force=False, retry=False, name=None, timeout=None): """Stop an instance. This method has complicated cleanup tests, as we must: - try to kill all leftover processes - try to unmount any additional sub-mountpoints - finally unmount the instance dir """ assert(timeout is None or force is not None) if name is None: name = instance.name root_dir = self._InstanceDir(name) if not os.path.exists(root_dir) or not self._IsDirLive(root_dir): return timeout_cmd = [] if timeout is not None: timeout_cmd.extend(["timeout", str(timeout)]) # Run the chroot stop script only once if not retry and not force: result = utils.RunCmd(timeout_cmd.extend(["chroot", root_dir, "/ganeti-chroot", "stop"])) if result.failed: raise HypervisorError("Can't run the chroot stop script: %s" % result.output) if not force: utils.RunCmd(["fuser", "-k", "-TERM", "-m", root_dir]) else: utils.RunCmd(["fuser", "-k", "-KILL", "-m", root_dir]) # 2 seconds at most should be enough for KILL to take action time.sleep(2) if self._IsDirLive(root_dir): if force: raise HypervisorError("Can't stop the processes using the chroot") return
def _GetCgroupMemoryLimit(cls, instance_name): """Return the memory limit for an instance """ try: mem_limit = cls._GetCgroupInstanceValue(instance_name, "memory.limit_in_bytes") return int(mem_limit) except EnvironmentError as err: raise HypervisorError("Can't get instance memory limit of %s: %s" % (instance_name, err))
def _IsInstanceAlive(cls, instance_name): """Return True if instance is alive. """ result = utils.RunCmd(["lxc-ls", "--running"]) if result.failed: raise HypervisorError( "Failed to get running LXC containers list: %s" % result.output) return instance_name in result.stdout.split()
def _ListAliveInstances(cls): """Return list of alive instances. """ result = utils.RunCmd(["lxc-ls", "--running"]) if result.failed: raise HypervisorError( "Failed to get running LXC containers list: %s" % result.output) return result.stdout.split()
def RebootInstance(self, instance): """Reboot an instance. """ if "sys_boot" in self._GetInstanceDropCapabilities(instance.hvparams): raise HypervisorError( "The LXC container can't perform a reboot with the" " SYS_BOOT capability dropped.") # We can't use the --timeout=-1 approach as same as the StopInstance due to # the following patch was applied in lxc-1.0.5 and we are supporting # LXC >= 1.0.0. # http://lists.linuxcontainers.org/pipermail/lxc-devel/2014-July/009742.html result = utils.RunCmd([ "lxc-stop", "-n", instance.name, "--reboot", "--timeout", str(self._REBOOT_TIMEOUT) ]) if result.failed: raise HypervisorError("Failed to reboot instance %s: %s" % (instance.name, result.output))
def _CreateBlankFile(path, mode): """Create blank file. Create a blank file for the path with specified mode. An existing file will be overwritten. """ try: utils.WriteFile(path, data="", mode=mode) except EnvironmentError as err: raise HypervisorError("Failed to create file %s: %s" % (path, err))
def GetMigrationStatus(self, instance): """Get the migration status @type instance: L{objects.Instance} @param instance: the instance that is being migrated @rtype: L{objects.MigrationStatus} @return: the status of the current migration (one of L{constants.HV_MIGRATION_VALID_STATUSES}), plus any additional progress info that can be retrieved from the hypervisor """ raise HypervisorError("Migration not supported by the chroot hypervisor")
def _LoadInstanceStash(self, instance_name): """Load information stashed in file which was created by L{_SaveInstanceStash}. """ stash_file = self._InstanceStashFilePath(instance_name) try: return serializer.Load(utils.ReadFile(stash_file)) except (EnvironmentError, ValueError) as err: raise HypervisorError( "Failed to load instance stash file %s : %s" % (stash_file, err))
def StartInstance(self, instance, block_devices, startup_paused): """Start an instance. For LXC, we try to mount the block device and execute 'lxc-start'. We use volatile containers. """ root_dir = self._InstanceDir(instance.name) try: utils.EnsureDirs([(root_dir, self._DIR_MODE)]) except errors.GenericError, err: raise HypervisorError("Creating instance directory failed: %s", str(err))
def CleanupInstance(self, instance_name): """Cleanup after a stopped instance """ root_dir = self._InstanceDir(instance_name) if not os.path.exists(root_dir): return if self._IsDirLive(root_dir): raise HypervisorError("Processes are still using the chroot") for mpath in self._GetMountSubdirs(root_dir): utils.RunCmd(["umount", mpath]) result = utils.RunCmd(["umount", root_dir]) if result.failed: msg = ("Processes still alive in the chroot: %s" % utils.RunCmd("fuser -vm %s" % root_dir).output) logging.error(msg) raise HypervisorError("Can't umount the chroot dir: %s (%s)" % (result.output, msg))
def _GetCgroupEnabledKernelSubsystems(cls): """Return cgroup subsystems list that are enabled in current kernel. """ try: subsys_table = utils.ReadFile(cls._PROC_CGROUPS_FILE) except EnvironmentError as err: raise HypervisorError("Failed to read cgroup info from %s: %s" % (cls._PROC_CGROUPS_FILE, err)) return [ x.split(None, 1)[0] for x in subsys_table.split("\n") if x and not x.startswith("#") ]
def _GetCurrentCgroupSubsysGroups(cls): """Return the dict of cgroup subsystem hierarchies this process belongs to. The dictionary has the cgroup subsystem as a key and its hierarchy as a value. Information is read from /proc/self/cgroup. """ try: cgroup_list = utils.ReadFile(cls._PROC_SELF_CGROUP_FILE) except EnvironmentError, err: raise HypervisorError("Failed to read %s : %s" % (cls._PROC_SELF_CGROUP_FILE, err))
def _GetLXCVersionFromCmd(cls, from_cmd): """Return the LXC version currently used in the system. Version information will be retrieved by command specified by from_cmd. @param from_cmd: the lxc command used to retrieve version information @type from_cmd: string @rtype: L{LXCVersion} @return: a version object which represents the version retrieved from the command """ result = utils.RunCmd([from_cmd, "--version"]) if result.failed: raise HypervisorError( "Failed to get version info from command %s: %s" % (from_cmd, result.output)) try: return LXCVersion(result.stdout.strip()) except ValueError as err: raise HypervisorError("Can't parse LXC version from %s: %s" % (from_cmd, err))
def BalloonInstanceMemory(self, instance, mem): """Balloon an instance memory to a certain value. @type instance: L{objects.Instance} @param instance: instance to be accepted @type mem: int @param mem: actual memory size to use for instance runtime """ mem_in_bytes = mem * 1024**2 current_mem_usage = self._GetCgroupMemoryLimit(instance.name) shrinking = mem_in_bytes <= current_mem_usage # The memsw.limit_in_bytes parameter might be present depending on kernel # parameters. # If present, it has to be modified at the same time as limit_in_bytes. if LXCHypervisor._IsCgroupParameterPresent(self._MEMORY_SWAP_PARAMETER, instance.hvparams): # memory.memsw.limit_in_bytes is the superlimit of memory.limit_in_bytes # so the order of setting these parameters is quite important. cgparams = [self._MEMORY_SWAP_PARAMETER, self._MEMORY_PARAMETER] else: cgparams = [self._MEMORY_PARAMETER] if shrinking: cgparams.reverse() for i, cgparam in enumerate(cgparams): try: self._SetCgroupInstanceValue(instance.name, cgparam, str(mem_in_bytes)) except EnvironmentError as err: if shrinking and err.errno == errno.EBUSY: logging.warn( "Unable to reclaim memory or swap usage from instance" " %s", instance.name) # Restore changed parameters for an atomicity for restore_param in cgparams[0:i]: try: self._SetCgroupInstanceValue(instance.name, restore_param, str(current_mem_usage)) except EnvironmentError as restore_err: logging.warn( "Can't restore the cgroup parameter %s of %s: %s", restore_param, instance.name, restore_err) raise HypervisorError( "Failed to balloon the memory of %s, can't set" " cgroup parameter %s: %s" % (instance.name, cgparam, err))
def MigrateInstance(self, cluster_name, instance, target, live): """Migrate an instance. @type cluster_name: string @param cluster_name: name of the cluster @type instance: L{objects.Instance} @param instance: the instance to be migrated @type target: string @param target: hostname (usually ip) of the target node @type live: boolean @param live: whether to do a live or non-live migration """ raise HypervisorError("Migration not supported by the chroot hypervisor")
def _SaveInstanceStash(self, instance_name, data): """Save data to the instance stash file in serialized format. """ stash_file = self._InstanceStashFilePath(instance_name) serialized = serializer.Dump(data) try: utils.WriteFile(stash_file, data=serialized, mode=constants.SECURE_FILE_MODE) except EnvironmentError as err: raise HypervisorError( "Failed to save instance stash file %s : %s" % (stash_file, err))
def _PrepareInstanceRootFsBdev(cls, storage_path, stash): """Return mountable path for storage_path. This function creates a partition mapping for storage_path and returns the first partition device path as a rootfs partition, and stashes the loopback device path. If storage_path is not a multi-partition block device, just return storage_path. """ try: ret = utils.CreateBdevPartitionMapping(storage_path) except errors.CommandError, err: raise HypervisorError("Failed to create partition mapping for %s" ": %s" % (storage_path, err))
def GetInstanceInfo(self, instance_name, hvparams=None): """Get instance properties. @type instance_name: string @param instance_name: the instance name @type hvparams: dict of strings @param hvparams: hvparams to be used with this instance @return: (name, id, memory, vcpus, stat, times) """ dir_name = self._InstanceDir(instance_name) if not self._IsDirLive(dir_name): raise HypervisorError("Instance %s is not running" % instance_name) return (instance_name, 0, 0, 0, hv_base.HvInstanceState.RUNNING, 0)
def _CleanupInstance(self, instance_name, stash): """Actual implementation of the instance cleanup procedure. @type instance_name: string @param instance_name: instance name @type stash: dict(string:any) @param stash: dict that contains desired information for instance cleanup """ try: if self._STASH_KEY_ALLOCATED_LOOP_DEV in stash: loop_dev_path = stash[self._STASH_KEY_ALLOCATED_LOOP_DEV] utils.ReleaseBdevPartitionMapping(loop_dev_path) except errors.CommandError, err: raise HypervisorError("Failed to cleanup partition mapping : %s" % err)
def _WaitForInstanceState(cls, instance_name, state, timeout): """Wait for an instance state transition within timeout Return True if an instance state changed to the desired state within timeout secs. """ result = utils.RunCmd(["lxc-wait", "-n", instance_name, "-s", state], timeout=timeout) if result.failed_by_timeout: return False elif result.failed: raise HypervisorError("Failure while waiting for instance state" " transition: %s" % result.output) else: return True
def _MountCgroupSubsystem(cls, subsystem): """Mount the cgroup subsystem fs under the cgroup root dir. @type subsystem: string @param subsystem: cgroup subsystem name to mount @rtype string @return path of subsystem mount point """ subsys_dir = utils.PathJoin(cls._GetCgroupMountPoint(), subsystem) if not os.path.isdir(subsys_dir): try: os.makedirs(subsys_dir) except EnvironmentError, err: raise HypervisorError("Failed to create directory %s: %s" % (subsys_dir, err))
def GetInstanceConsole(cls, instance, primary_node, # pylint: disable=W0221 node_group, hvparams, beparams, root_dir=None): """Return information for connecting to the console of an instance. """ if root_dir is None: root_dir = cls._InstanceDir(instance.name) if not os.path.ismount(root_dir): raise HypervisorError("Instance %s is not running" % instance.name) ndparams = node_group.FillND(primary_node) return objects.InstanceConsole(instance=instance.name, kind=constants.CONS_SSH, host=primary_node.name, port=ndparams.get(constants.ND_SSH_PORT), user=constants.SSH_CONSOLE_USER, command=["chroot", root_dir])