コード例 #1
0
ファイル: iallocator.py プロジェクト: sajalcody/ganeti
    def _GetAttributeFromHypervisorNodeData(hv_info, node_name, attr):
        """Extract an attribute from the hypervisor's node information.

    This is a helper function to extract data from the hypervisor's information
    about the node, as part of the result of a node_info query.

    @type hv_info: dict of strings
    @param hv_info: dictionary of node information from the hypervisor
    @type node_name: string
    @param node_name: name of the node
    @type attr: string
    @param attr: key of the attribute in the hv_info dictionary
    @rtype: integer
    @return: the value of the attribute
    @raises errors.OpExecError: if key not in dictionary or value not
      integer

    """
        if attr not in hv_info:
            raise errors.OpExecError("Node '%s' didn't return attribute"
                                     " '%s'" % (node_name, attr))
        value = hv_info[attr]
        if not isinstance(value, int):
            raise errors.OpExecError("Node '%s' returned invalid value"
                                     " for '%s': %s" %
                                     (node_name, attr, value))
        return value
コード例 #2
0
ファイル: iallocator.py プロジェクト: sajalcody/ganeti
    def _ValidateResult(self):
        """Process the allocator results.

    This will process and if successful save the result in
    self.out_data and the other parameters.

    """
        try:
            rdict = serializer.Load(self.out_text)
        except Exception as err:
            raise errors.OpExecError("Can't parse iallocator results: %s" %
                                     str(err))

        if not isinstance(rdict, dict):
            raise errors.OpExecError(
                "Can't parse iallocator results: not a dict")

        # TODO: remove backwards compatiblity in later versions
        if "nodes" in rdict and "result" not in rdict:
            rdict["result"] = rdict["nodes"]
            del rdict["nodes"]

        for key in "success", "info", "result":
            if key not in rdict:
                raise errors.OpExecError("Can't parse iallocator results:"
                                         " missing key '%s'" % key)
            setattr(self, key, rdict[key])

        self.req.ValidateResult(self, self.result)
        self.out_data = rdict
コード例 #3
0
ファイル: network.py プロジェクト: volans-/ganeti
    def Exec(self, feedback_fn):
        """Add the ip pool to the cluster.

    """
        nobj = objects.Network(name=self.op.network_name,
                               network=self.op.network,
                               gateway=self.op.gateway,
                               network6=self.op.network6,
                               gateway6=self.op.gateway6,
                               mac_prefix=self.op.mac_prefix,
                               uuid=self.network_uuid)
        # Initialize the associated address pool
        try:
            pool = network.AddressPool.InitializeNetwork(nobj)
        except errors.AddressPoolError as err:
            raise errors.OpExecError(
                "Cannot create IP address pool for network"
                " '%s': %s" % (self.op.network_name, err))

        # Check if we need to reserve the nodes and the cluster master IP
        # These may not be allocated to any instances in routed mode, as
        # they wouldn't function anyway.
        if self.op.conflicts_check:
            for node in self.cfg.GetAllNodesInfo().values():
                for ip in [node.primary_ip, node.secondary_ip]:
                    try:
                        if pool.Contains(ip):
                            pool.Reserve(ip, external=True)
                            self.LogInfo(
                                "Reserved IP address of node '%s' (%s)",
                                node.name, ip)
                    except errors.AddressPoolError as err:
                        self.LogWarning(
                            "Cannot reserve IP address '%s' of node '%s': %s",
                            ip, node.name, err)

            master_ip = self.cfg.GetClusterInfo().master_ip
            try:
                if pool.Contains(master_ip):
                    pool.Reserve(master_ip, external=True)
                    self.LogInfo("Reserved cluster master IP address (%s)",
                                 master_ip)
            except errors.AddressPoolError as err:
                self.LogWarning(
                    "Cannot reserve cluster master IP address (%s): %s",
                    master_ip, err)

        if self.op.add_reserved_ips:
            for ip in self.op.add_reserved_ips:
                try:
                    pool.Reserve(ip, external=True)
                except errors.AddressPoolError as err:
                    raise errors.OpExecError(
                        "Cannot reserve IP address '%s': %s" % (ip, err))

        if self.op.tags:
            for tag in self.op.tags:
                nobj.AddTag(tag)

        self.cfg.AddNetwork(nobj, self.proc.GetECId(), check_uuid=False)
コード例 #4
0
def _TestJobSubmission(opts):
    """Tests submitting jobs.

  """
    ToStdout("Testing job submission")

    testdata = [
        (0, 0, constants.OP_PRIO_LOWEST),
        (0, 0, constants.OP_PRIO_HIGHEST),
    ]

    for priority in (constants.OP_PRIO_SUBMIT_VALID | frozenset(
        [constants.OP_PRIO_LOWEST, constants.OP_PRIO_HIGHEST])):
        for offset in [-1, +1]:
            testdata.extend([
                (0, 0, priority + offset),
                (3, 0, priority + offset),
                (0, 3, priority + offset),
                (4, 2, priority + offset),
            ])

    for before, after, failpriority in testdata:
        ops = []
        ops.extend([opcodes.OpTestDelay(duration=0) for _ in range(before)])
        ops.append(opcodes.OpTestDelay(duration=0, priority=failpriority))
        ops.extend([opcodes.OpTestDelay(duration=0) for _ in range(after)])

        try:
            cl = cli.GetClient()
            cl.SubmitJob(ops)
        except errors.GenericError as err:
            if opts.debug:
                ToStdout("Ignoring error for 'wrong priority' test: %s", err)
        else:
            raise errors.OpExecError(
                "Submitting opcode with priority %s did not"
                " fail when it should (allowed are %s)" %
                (failpriority, constants.OP_PRIO_SUBMIT_VALID))

        jobs = [
            [
                opcodes.OpTestDelay(duration=0),
                opcodes.OpTestDelay(duration=0, dry_run=False),
                opcodes.OpTestDelay(duration=0, dry_run=True)
            ],
            ops,
        ]
        try:
            cl = cli.GetClient()
            cl.SubmitManyJobs(jobs)
        except errors.GenericError as err:
            if opts.debug:
                ToStdout("Ignoring error for 'wrong priority' test: %s", err)
        else:
            raise errors.OpExecError(
                "Submitting manyjobs with an incorrect one"
                " did not fail when it should.")
    ToStdout("Job submission tests were successful")
コード例 #5
0
ファイル: group.py プロジェクト: sajalcody/ganeti
    def CheckPrereq(self):
        """Check prerequisites.

    """
        assert self.needed_locks[locking.LEVEL_NODEGROUP]
        assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) == frozenset(
            self.op.node_uuids))

        expected_locks = (set([self.group_uuid]) |
                          self.cfg.GetNodeGroupsFromNodes(self.op.node_uuids))
        actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
        if actual_locks != expected_locks:
            raise errors.OpExecError(
                "Nodes changed groups since locks were acquired,"
                " current groups are '%s', used to be '%s'" %
                (utils.CommaJoin(expected_locks),
                 utils.CommaJoin(actual_locks)))

        self.node_data = self.cfg.GetAllNodesInfo()
        self.group = self.cfg.GetNodeGroup(self.group_uuid)
        instance_data = self.cfg.GetAllInstancesInfo()

        if self.group is None:
            raise errors.OpExecError(
                "Could not retrieve group '%s' (UUID: %s)" %
                (self.op.group_name, self.group_uuid))

        (new_splits, previous_splits) = \
          self.CheckAssignmentForSplitInstances([(uuid, self.group_uuid)
                                                 for uuid in self.op.node_uuids],
                                                self.node_data, instance_data)

        if new_splits:
            fmt_new_splits = utils.CommaJoin(
                utils.NiceSort(self.cfg.GetInstanceNames(new_splits)))

            if not self.op.force:
                raise errors.OpExecError(
                    "The following instances get split by this"
                    " change and --force was not given: %s" % fmt_new_splits)
            else:
                self.LogWarning(
                    "This operation will split the following instances: %s",
                    fmt_new_splits)

                if previous_splits:
                    self.LogWarning(
                        "In addition, these already-split instances continue"
                        " to be split across groups: %s",
                        utils.CommaJoin(
                            utils.NiceSort(
                                self.cfg.GetInstanceNames(previous_splits))))
コード例 #6
0
ファイル: ssh.py プロジェクト: sajalcody/ganeti
def GetUserFiles(user,
                 mkdir=False,
                 dircheck=True,
                 kind=constants.SSHK_DSA,
                 _homedir_fn=None):
    """Return the paths of a user's SSH files.

  @type user: string
  @param user: Username
  @type mkdir: bool
  @param mkdir: Whether to create ".ssh" directory if it doesn't exist
  @type dircheck: bool
  @param dircheck: Whether to check if ".ssh" directory exists
  @type kind: string
  @param kind: One of L{constants.SSHK_ALL}
  @rtype: tuple; (string, string, string)
  @return: Tuple containing three file system paths; the private SSH key file,
    the public SSH key file and the user's C{authorized_keys} file
  @raise errors.OpExecError: When home directory of the user can not be
    determined
  @raise errors.OpExecError: Regardless of the C{mkdir} parameters, this
    exception is raised if C{~$user/.ssh} is not a directory and C{dircheck}
    is set to C{True}

  """
    if _homedir_fn is None:
        _homedir_fn = utils.GetHomeDir

    user_dir = _homedir_fn(user)
    if not user_dir:
        raise errors.OpExecError("Cannot resolve home of user '%s'" % user)

    if kind == constants.SSHK_DSA:
        suffix = "dsa"
    elif kind == constants.SSHK_RSA:
        suffix = "rsa"
    elif kind == constants.SSHK_ECDSA:
        suffix = "ecdsa"
    else:
        raise errors.ProgrammerError("Unknown SSH key kind '%s'" % kind)

    ssh_dir = utils.PathJoin(user_dir, ".ssh")
    if mkdir:
        utils.EnsureDirs([(ssh_dir, constants.SECURE_DIR_MODE)])
    elif dircheck and not os.path.isdir(ssh_dir):
        raise errors.OpExecError("Path %s is not a directory" % ssh_dir)

    return [
        utils.PathJoin(ssh_dir, base) for base in
        ["id_%s" %
         suffix, "id_%s.pub" % suffix, "authorized_keys"]
    ]
コード例 #7
0
ファイル: base.py プロジェクト: volans-/ganeti
    def _GetNames(self, lu, all_names, lock_level):
        """Helper function to determine names asked for in the query.

    """
        if self.do_locking:
            names = lu.owned_locks(lock_level)
        else:
            names = all_names

        if self.wanted == locking.ALL_SET:
            assert not self.names
            # caller didn't specify names, so ordering is not important
            return utils.NiceSort(names)

        # caller specified names and we must keep the same order
        assert self.names

        missing = set(self.wanted).difference(names)
        if missing:
            raise errors.OpExecError(
                "Some items were removed before retrieving"
                " their data: %s" % missing)

        # Return expanded names
        return self.wanted
コード例 #8
0
    def _LockAndExecLU(self, lu, level, calc_timeout):
        """Execute a Logical Unit, with the needed locks.

    This is a recursive function that starts locking the given level, and
    proceeds up, till there are no more locks to acquire. Then it executes the
    given LU and its opcodes.

    """
        adding_locks = level in lu.add_locks
        acquiring_locks = level in lu.needed_locks

        if level not in locking.LEVELS:
            _VerifyLocks(lu)

            if self._cbs:
                self._cbs.NotifyStart()

            try:
                result = self._ExecLU(lu)
            except AssertionError, err:
                # this is a bit ugly, as we don't know from which phase
                # (prereq, exec) this comes; but it's better than an exception
                # with no information
                (_, _, tb) = sys.exc_info()
                err_info = traceback.format_tb(tb)
                del tb
                logging.exception("Detected AssertionError")
                raise errors.OpExecError(
                    "Internal assertion error: please report"
                    " this as a bug.\nError message: '%s';"
                    " location:\n%s" % (str(err), err_info[-1]))
コード例 #9
0
ファイル: masterd.py プロジェクト: ribag/ganeti-experiments
def _SetWatcherPause(context, ec_id, until):
    """Creates or removes the watcher pause file.

  @type context: L{GanetiContext}
  @param context: Global Ganeti context
  @type until: None or int
  @param until: Unix timestamp saying until when the watcher shouldn't run

  """
    node_names = context.GetConfig(ec_id).GetNodeList()

    if until is None:
        logging.info("Received request to no longer pause watcher")
    else:
        if not ht.TNumber(until):
            raise TypeError("Duration must be numeric")

        if until < time.time():
            raise errors.GenericError(
                "Unable to set pause end time in the past")

        logging.info("Received request to pause watcher until %s", until)

    result = context.rpc.call_set_watcher_pause(node_names, until)

    errmsg = utils.CommaJoin("%s (%s)" % (node_name, nres.fail_msg)
                             for (node_name, nres) in result.items()
                             if nres.fail_msg and not nres.offline)
    if errmsg:
        raise errors.OpExecError(
            "Watcher pause was set where possible, but failed"
            " on the following node(s): %s" % errmsg)

    return until
コード例 #10
0
    def Exec(self, feedback_fn):
        """Connect to the console of an instance

    """
        node_uuid = self.instance.primary_node

        cluster_hvparams = self.cfg.GetClusterInfo().hvparams
        node_insts = self.rpc.call_instance_list([node_uuid],
                                                 [self.instance.hypervisor],
                                                 cluster_hvparams)[node_uuid]
        node_insts.Raise("Can't get node information from %s" %
                         self.cfg.GetNodeName(node_uuid))

        if self.instance.name not in node_insts.payload:
            if self.instance.admin_state == constants.ADMINST_UP:
                state = constants.INSTST_ERRORDOWN
            elif self.instance.admin_state == constants.ADMINST_DOWN:
                state = constants.INSTST_ADMINDOWN
            else:
                state = constants.INSTST_ADMINOFFLINE
            raise errors.OpExecError("Instance %s is not running (state %s)" %
                                     (self.instance.name, state))

        logging.debug("Connecting to console of %s on %s", self.instance.name,
                      self.cfg.GetNodeName(node_uuid))

        node = self.cfg.GetNodeInfo(self.instance.primary_node)
        group = self.cfg.GetNodeGroup(node.group)
        return GetInstanceConsole(self.cfg.GetClusterInfo(), self.instance,
                                  node, group)
コード例 #11
0
ファイル: gnt_debug.py プロジェクト: vladimir-ipatov/ganeti-1
  def ReportLogMessage(self, job_id, serial, timestamp, log_type, log_msg):
    """Handles a log message.

    """
    if self._job_id is None:
      self._job_id = job_id
    elif self._job_id != job_id:
      raise errors.ProgrammerError("The same reporter instance was used for"
                                   " more than one job")

    if log_type == constants.ELOG_JQUEUE_TEST:
      (sockname, test, arg) = log_msg
      return self._ProcessTestMessage(job_id, sockname, test, arg)

    elif (log_type == constants.ELOG_MESSAGE and
          log_msg.startswith(constants.JQT_MSGPREFIX)):
      if self._testmsgs is None:
        raise errors.OpExecError("Received test message without a preceding"
                                 " start message")
      testmsg = log_msg[len(constants.JQT_MSGPREFIX):]
      self._testmsgs.append(testmsg)
      self._all_testmsgs.append(testmsg)
      return

    return cli.StdioJobPollReportCb.ReportLogMessage(self, job_id, serial,
                                                     timestamp, log_type,
                                                     log_msg)
コード例 #12
0
  def RunCommand(self, cluster_name, node, base_cmd, port, data,
                 debug=False, verbose=False, use_cluster_key=False,
                 ask_key=False, strict_host_check=False,
                 ensure_version=False):
    """This emulates ssh.RunSshCmdWithStdin calling ssh_update.

    While in real SSH operations, ssh.RunSshCmdWithStdin is called
    with the command ssh_update to manipulate a remote node's SSH
    key files (authorized_keys and ganeti_pub_key) file, this method
    emulates the operation by manipulating only its internal dictionaries
    of SSH keys. No actual key files of any node is touched.

    """
    if node in self._max_retries:
      if node not in self._retries:
        self._retries[node] = 0
      self._retries[node] += 1
      if self._retries[node] < self._max_retries[node]:
        raise errors.OpExecError("(Fake) SSH connection to node '%s' failed."
                                 % node)

    assert base_cmd == pathutils.SSH_UPDATE

    if constants.SSHS_SSH_AUTHORIZED_KEYS in data:
      instructions_auth = data[constants.SSHS_SSH_AUTHORIZED_KEYS]
      self._HandleAuthorizedKeys(instructions_auth, node)
    if constants.SSHS_SSH_PUBLIC_KEYS in data:
      instructions_pub = data[constants.SSHS_SSH_PUBLIC_KEYS]
      self._HandlePublicKeys(instructions_pub, node)
    if constants.SSHS_GENERATE in data:
      instructions_generate = data[constants.SSHS_GENERATE]
      self._GenerateNewKey(instructions_generate, node)
コード例 #13
0
 def Exec(self, feedback_fn):
     if self.op.osparams_secret:
         msg = "Secret OS parameters: %s" % self.op.osparams_secret.Unprivate(
         )
         feedback_fn(msg)
     else:
         raise errors.OpExecError("Opcode needs secret parameters")
コード例 #14
0
  def _CheckPayload(self, result):
    """Checks if the payload is valid.

    @param result: RPC result
    @raises errors.OpExecError: If payload is not valid

    """
    errs = []
    if self.op.command == constants.OOB_HEALTH:
      if not isinstance(result.payload, list):
        errs.append("command 'health' is expected to return a list but got %s" %
                    type(result.payload))
      else:
        for item, status in result.payload:
          if status not in constants.OOB_STATUSES:
            errs.append("health item '%s' has invalid status '%s'" %
                        (item, status))

    if self.op.command == constants.OOB_POWER_STATUS:
      if not isinstance(result.payload, dict):
        errs.append("power-status is expected to return a dict but got %s" %
                    type(result.payload))

    if self.op.command in [
      constants.OOB_POWER_ON,
      constants.OOB_POWER_OFF,
      constants.OOB_POWER_CYCLE,
      ]:
      if result.payload is not None:
        errs.append("%s is expected to not return payload but got '%s'" %
                    (self.op.command, result.payload))

    if errs:
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
                               utils.CommaJoin(errs))
コード例 #15
0
    def _ProcessTestMessage(self, job_id, sockname, test, arg):
        """Handles a job queue test message.

    """
        if test not in constants.JQT_ALL:
            raise errors.OpExecError("Received invalid test message %s" % test)

        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
        try:
            sock.settimeout(30.0)

            logging.debug("Connecting to %s", sockname)
            sock.connect(sockname)

            logging.debug("Checking status")
            jobdetails = cli.GetClient().QueryJobs([job_id], ["status"])[0]
            if not jobdetails:
                raise errors.OpExecError("Can't find job %s" % job_id)

            status = jobdetails[0]

            logging.debug("Status of job %s is %s", job_id, status)

            if test == constants.JQT_EXPANDNAMES:
                if status != constants.JOB_STATUS_WAITING:
                    raise errors.OpExecError(
                        "Job status while expanding names is '%s',"
                        " not '%s' as expected" %
                        (status, constants.JOB_STATUS_WAITING))
            elif test in (constants.JQT_EXEC, constants.JQT_LOGMSG):
                if status != constants.JOB_STATUS_RUNNING:
                    raise errors.OpExecError(
                        "Job status while executing opcode is '%s',"
                        " not '%s' as expected" %
                        (status, constants.JOB_STATUS_RUNNING))

            if test == constants.JQT_STARTMSG:
                logging.debug("Expecting %s test messages", arg)
                self._testmsgs = []
            elif test == constants.JQT_LOGMSG:
                if len(self._testmsgs) != arg:
                    raise errors.OpExecError(
                        "Received %s test messages when %s are"
                        " expected" % (len(self._testmsgs), arg))
        finally:
            logging.debug("Closing socket")
            sock.close()
コード例 #16
0
    def _DetermineImageSize(self, image_path, node_uuid):
        """ Determines the size of the specified image.

    @type image_path: string
    @param image_path: The disk path or a URL of an image.
    @type node_uuid: string
    @param node_uuid: If a file path is used,

    @raise OpExecError: If the image does not exist.

    @rtype: int
    @return: The size in MB, rounded up.

    """

        # Check if we are dealing with a URL first
        class _HeadRequest(urllib2.Request):
            def get_method(self):
                return "HEAD"

        if utils.IsUrl(image_path):
            try:
                response = urllib2.urlopen(_HeadRequest(image_path))
            except urllib2.URLError:
                raise errors.OpExecError(
                    "Could not retrieve image from given url %s" % image_path)

            content_length_str = response.info().getheader('content-length')

            if not content_length_str:
                raise errors.OpExecError(
                    "Cannot create temporary disk: size of zeroing image at path %s "
                    "could not be retrieved through HEAD request" % image_path)

            byte_size = int(content_length_str)
        else:
            # We end up here if a file path is used
            result = self.rpc.call_get_file_info(node_uuid, image_path)
            result.Raise("Cannot determine the size of file %s" % image_path)

            success, attributes = result.payload
            if not success:
                raise errors.OpExecError("Could not open file %s" % image_path)
            byte_size = attributes[constants.STAT_SIZE]

        # Finally, the conversion
        return math.ceil(byte_size / 1024. / 1024.)
コード例 #17
0
def _InitGanetiServerSetup(master_name, cfg):
    """Setup the necessary configuration for the initial node daemon.

  This creates the nodepass file containing the shared password for
  the cluster, generates the SSL certificate and starts the node daemon.

  @type master_name: str
  @param master_name: Name of the master node
  @type cfg: ConfigWriter
  @param cfg: the configuration writer

  """
    # Generate cluster secrets
    GenerateClusterCrypto(True, False, False, False, False, False, master_name)

    # Add the master's SSL certificate digest to the configuration.
    master_uuid = cfg.GetMasterNode()
    master_digest = utils.GetCertificateDigest()
    cfg.AddNodeToCandidateCerts(master_uuid, master_digest)
    cfg.Update(cfg.GetClusterInfo(), logging.error)
    ssconf.WriteSsconfFiles(cfg.GetSsconfValues())

    if not os.path.exists(
            os.path.join(
                pathutils.DATA_DIR, "%s%s" %
                (constants.SSCONF_FILEPREFIX,
                 constants.SS_MASTER_CANDIDATES_CERTS))):
        raise errors.OpExecError(
            "Ssconf file for master candidate certificates"
            " was not written.")

    if not os.path.exists(pathutils.NODED_CERT_FILE):
        raise errors.OpExecError(
            "The server certficate was not created properly.")

    if not os.path.exists(pathutils.NODED_CLIENT_CERT_FILE):
        raise errors.OpExecError("The client certificate was not created"
                                 " properly.")

    # set up the inter-node password and certificate
    result = utils.RunCmd([pathutils.DAEMON_UTIL, "start", constants.NODED])
    if result.failed:
        raise errors.OpExecError("Could not start the node daemon, command %s"
                                 " had exitcode %s and error %s" %
                                 (result.cmd, result.exit_code, result.output))

    _WaitForNodeDaemon(master_name)
コード例 #18
0
    def Exec(self, feedback_fn):
        """Sets the tag.

    """
        try:
            for tag in self.op.tags:
                self.target.AddTag(tag)
        except errors.TagError, err:
            raise errors.OpExecError("Error while setting tag: %s" % str(err))
コード例 #19
0
ファイル: group.py プロジェクト: sajalcody/ganeti
    def Exec(self, feedback_fn):
        """Remove the node group.

    """
        try:
            self.cfg.RemoveNodeGroup(self.group_uuid)
        except errors.ConfigurationError:
            raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
                                     (self.op.group_name, self.group_uuid))
コード例 #20
0
  def Exec(self, feedback_fn):
    """Remove the network.

    """
    try:
      self.cfg.RemoveNetwork(self.network_uuid)
    except errors.ConfigurationError:
      raise errors.OpExecError("Network '%s' with UUID %s disappeared" %
                               (self.op.network_name, self.network_uuid))
コード例 #21
0
ファイル: group.py プロジェクト: sajalcody/ganeti
    def CheckPrereq(self):
        """Check prerequisites.

    """
        owned_instance_names = frozenset(
            self.owned_locks(locking.LEVEL_INSTANCE))

        # Check if locked instances are still correct
        CheckNodeGroupInstances(self.cfg, self.group_uuid,
                                owned_instance_names)

        self.group = self.cfg.GetNodeGroup(self.group_uuid)
        cluster = self.cfg.GetClusterInfo()

        if self.group is None:
            raise errors.OpExecError(
                "Could not retrieve group '%s' (UUID: %s)" %
                (self.op.group_name, self.group_uuid))

        if self.op.ndparams:
            new_ndparams = GetUpdatedParams(self.group.ndparams,
                                            self.op.ndparams)
            utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
            self.new_ndparams = new_ndparams

        if self.op.diskparams:
            diskparams = self.group.diskparams
            uavdp = self._UpdateAndVerifyDiskParams
            # For each disktemplate subdict update and verify the values
            new_diskparams = dict(
                (dt, uavdp(diskparams.get(dt, {}), self.op.diskparams[dt]))
                for dt in constants.DISK_TEMPLATES if dt in self.op.diskparams)
            # As we've all subdicts of diskparams ready, lets merge the actual
            # dict with all updated subdicts
            self.new_diskparams = objects.FillDict(diskparams, new_diskparams)

            try:
                utils.VerifyDictOptions(self.new_diskparams,
                                        constants.DISK_DT_DEFAULTS)
                CheckDiskAccessModeConsistency(self.new_diskparams,
                                               self.cfg,
                                               group=self.group)
            except errors.OpPrereqError as err:
                raise errors.OpPrereqError(
                    "While verify diskparams options: %s" % err,
                    errors.ECODE_INVAL)

        if self.op.hv_state:
            self.new_hv_state = MergeAndVerifyHvState(
                self.op.hv_state, self.group.hv_state_static)

        if self.op.disk_state:
            self.new_disk_state = \
              MergeAndVerifyDiskState(self.op.disk_state,
                                      self.group.disk_state_static)

        self._CheckIpolicy(cluster, owned_instance_names)
コード例 #22
0
    def _LockAndExecLU(self, lu, level, calc_timeout, pending=None):
        """Execute a Logical Unit, with the needed locks.

    This is a recursive function that starts locking the given level, and
    proceeds up, till there are no more locks to acquire. Then it executes the
    given LU and its opcodes.

    """
        pending = pending or []
        logging.debug("Looking at locks of level %s, still need to obtain %s",
                      level, pending)
        adding_locks = level in lu.add_locks
        acquiring_locks = level in lu.needed_locks

        if level not in locking.LEVELS:
            if pending:
                self._RequestAndWait(pending, calc_timeout())
                lu.wconfdlocks = self.wconfd.Client().ListLocks(
                    self._wconfdcontext)
                pending = []

            logging.debug("Finished acquiring locks")

            _VerifyLocks(lu)

            if self._cbs:
                self._cbs.NotifyStart()

            try:
                result = self._ExecLU(lu)
            except errors.OpPrereqError, err:
                (_, ecode) = err.args
                if ecode != errors.ECODE_TEMP_NORES:
                    raise
                logging.debug(
                    "Temporarily out of resources; will retry internally")
                try:
                    lu.PrepareRetry(self.Log)
                    if self._cbs:
                        self._cbs.NotifyRetry()
                except errors.OpRetryNotSupportedError:
                    logging.debug("LU does not know how to retry.")
                    raise err
                raise LockAcquireTimeout()
            except AssertionError, err:
                # this is a bit ugly, as we don't know from which phase
                # (prereq, exec) this comes; but it's better than an exception
                # with no information
                (_, _, tb) = sys.exc_info()
                err_info = traceback.format_tb(tb)
                del tb
                logging.exception("Detected AssertionError")
                raise errors.OpExecError(
                    "Internal assertion error: please report"
                    " this as a bug.\nError message: '%s';"
                    " location:\n%s" % (str(err), err_info[-1]))
コード例 #23
0
    def ZeroFreeSpace(self, feedback_fn):
        """Zeroes the free space on a shutdown instance.

    @type feedback_fn: function
    @param feedback_fn: Function used to log progress

    """
        assert self.op.zeroing_timeout_fixed is not None
        assert self.op.zeroing_timeout_per_mib is not None

        zeroing_image = self.cfg.GetZeroingImage()
        src_node_uuid = self.instance.primary_node
        disk_size = self._DetermineImageSize(zeroing_image, src_node_uuid)

        # Calculate the sum prior to adding the temporary disk
        instance_disks_size_sum = self._InstanceDiskSizeSum()

        with TemporaryDisk(self, self.instance, disk_size, feedback_fn):
            feedback_fn("Activating instance disks")
            StartInstanceDisks(self, self.instance, False)

            feedback_fn("Imaging disk with zeroing image")
            ImageDisks(self, self.instance, zeroing_image)

            feedback_fn("Starting instance with zeroing image")
            result = self.rpc.call_instance_start(src_node_uuid,
                                                  (self.instance, [], []),
                                                  False, self.op.reason)
            result.Raise(
                "Could not start instance %s when using the zeroing image "
                "%s" % (self.instance.name, zeroing_image))

            # First wait for the instance to start up
            running_check = lambda: IsInstanceRunning(
                self, self.instance, check_user_shutdown=True)
            instance_up = retry.SimpleRetry(True, running_check, 5.0,
                                            self.op.shutdown_timeout)
            if not instance_up:
                raise errors.OpExecError(
                    "Could not boot instance when using the "
                    "zeroing image %s" % zeroing_image)

            feedback_fn("Instance is up, now awaiting shutdown")

            # Then for it to be finished, detected by its shutdown
            timeout = self.op.zeroing_timeout_fixed + \
                      self.op.zeroing_timeout_per_mib * instance_disks_size_sum
            instance_up = retry.SimpleRetry(False, running_check, 20.0,
                                            timeout)
            if instance_up:
                self.LogWarning(
                    "Zeroing not completed prior to timeout; instance will"
                    "be shut down forcibly")

        feedback_fn("Zeroing completed!")
コード例 #24
0
  def _ValidateResult(self):
    """Process the allocator results.

    This will process and if successful save the result in
    self.out_data and the other parameters.

    """
    try:
      rdict = serializer.Load(self.out_text)
    except Exception, err:
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
コード例 #25
0
ファイル: test.py プロジェクト: vanloswang/ganeti
  def _UninterruptibleDelay(self):
    """Delays without allowing interruptions.

    """
    if self.op.on_node_uuids:
      result = self.rpc.call_test_delay(self.op.on_node_uuids, self.op.duration)
      for node_uuid, node_result in result.items():
        node_result.Raise("Failure during rpc call to node %s" %
                          self.cfg.GetNodeName(node_uuid))
    else:
      if not utils.TestDelay(self.op.duration)[0]:
        raise errors.OpExecError("Error during master delay test")
コード例 #26
0
ファイル: test.py プロジェクト: badp/ganeti
  def _TestDelay(self):
    """Do the actual sleep.

    """
    if self.op.on_master:
      if not utils.TestDelay(self.op.duration)[0]:
        raise errors.OpExecError("Error during master delay test")
    if self.op.on_node_uuids:
      result = self.rpc.call_test_delay(self.op.on_node_uuids, self.op.duration)
      for node_uuid, node_result in result.items():
        node_result.Raise("Failure during rpc call to node %s" %
                          self.cfg.GetNodeName(node_uuid))
コード例 #27
0
ファイル: network.py プロジェクト: saschalucas/ganeti
  def Exec(self, feedback_fn):
    """Rename the network.

    """
    network = self.cfg.GetNetwork(self.network_uuid)

    if network is None:
      raise errors.OpExecError("Could not retrieve network '%s' (UUID: %s)" %
                               (self.op.network_name, self.network_uuid))

    network.name = self.op.new_name
    self.cfg.Update(network, feedback_fn)

    return self.op.new_name
コード例 #28
0
ファイル: iallocator.py プロジェクト: sajalcody/ganeti
    def _ComputeStorageDataFromSpaceInfo(space_info, node_name, has_lvm):
        """Extract storage data from node info.

    @type space_info: see result of the RPC call node info
    @param space_info: the storage reporting part of the result of the RPC call
      node info
    @type node_name: string
    @param node_name: the node's name
    @type has_lvm: boolean
    @param has_lvm: whether or not LVM storage information is requested
    @rtype: 4-tuple of integers
    @return: tuple of storage info (total_disk, free_disk, total_spindles,
       free_spindles)

    """
        # TODO: replace this with proper storage reporting
        if has_lvm:
            lvm_vg_info = utils.storage.LookupSpaceInfoByStorageType(
                space_info, constants.ST_LVM_VG)
            if not lvm_vg_info:
                raise errors.OpExecError(
                    "Node '%s' didn't return LVM vg space info." % (node_name))
            total_disk = lvm_vg_info["storage_size"]
            free_disk = lvm_vg_info["storage_free"]
            lvm_pv_info = utils.storage.LookupSpaceInfoByStorageType(
                space_info, constants.ST_LVM_PV)
            if not lvm_pv_info:
                raise errors.OpExecError(
                    "Node '%s' didn't return LVM pv space info." % (node_name))
            total_spindles = lvm_pv_info["storage_size"]
            free_spindles = lvm_pv_info["storage_free"]
        else:
            # we didn't even ask the node for VG status, so use zeros
            total_disk = free_disk = 0
            total_spindles = free_spindles = 0
        return (total_disk, free_disk, total_spindles, free_spindles)
コード例 #29
0
ファイル: group.py プロジェクト: sajalcody/ganeti
    def Exec(self, feedback_fn):
        """Rename the node group.

    """
        group = self.cfg.GetNodeGroup(self.group_uuid)

        if group is None:
            raise errors.OpExecError(
                "Could not retrieve group '%s' (UUID: %s)" %
                (self.op.group_name, self.group_uuid))

        group.name = self.op.new_name
        self.cfg.Update(group, feedback_fn)

        return self.op.new_name
コード例 #30
0
    def Exec(self, feedback_fn):
        """Reboot the instance.

    """
        cluster = self.cfg.GetClusterInfo()
        remote_info = self.rpc.call_instance_info(
            self.instance.primary_node, self.instance.name,
            self.instance.hypervisor,
            cluster.hvparams[self.instance.hypervisor])
        remote_info.Raise("Error checking node %s" %
                          self.cfg.GetNodeName(self.instance.primary_node))
        instance_running = bool(remote_info.payload)

        current_node_uuid = self.instance.primary_node

        if instance_running and \
            self.op.reboot_type in [constants.INSTANCE_REBOOT_SOFT,
                                    constants.INSTANCE_REBOOT_HARD]:
            result = self.rpc.call_instance_reboot(current_node_uuid,
                                                   self.instance,
                                                   self.op.reboot_type,
                                                   self.op.shutdown_timeout,
                                                   self.op.reason)
            result.Raise("Could not reboot instance")
        else:
            if instance_running:
                result = self.rpc.call_instance_shutdown(
                    current_node_uuid, self.instance, self.op.shutdown_timeout,
                    self.op.reason)
                result.Raise("Could not shutdown instance for full reboot")
                ShutdownInstanceDisks(self, self.instance)
                self.instance = self.cfg.GetInstanceInfo(self.instance.uuid)
            else:
                self.LogInfo("Instance %s was already stopped, starting now",
                             self.instance.name)
            StartInstanceDisks(self, self.instance, self.op.ignore_secondaries)
            self.instance = self.cfg.GetInstanceInfo(self.instance.uuid)
            result = self.rpc.call_instance_start(current_node_uuid,
                                                  (self.instance, None, None),
                                                  False, self.op.reason)
            msg = result.fail_msg
            if msg:
                ShutdownInstanceDisks(self, self.instance)
                self.instance = self.cfg.GetInstanceInfo(self.instance.uuid)
                raise errors.OpExecError("Could not start instance for"
                                         " full reboot: %s" % msg)

        self.cfg.MarkInstanceUp(self.instance.uuid)