예제 #1
0
            assert len(disk_info) == len(self.op.target_node)
            self.dest_disk_info = disk_info

        else:
            raise errors.ProgrammerError("Unhandled export mode %r" %
                                         self.op.mode)

        # Check prerequisites for zeroing
        if self.op.zero_free_space:
            # Check that user shutdown detection has been enabled
            hvparams = self.cfg.GetClusterInfo().FillHV(self.instance)
            if self.instance.hypervisor == constants.HT_KVM and \
               not hvparams.get(constants.HV_KVM_USER_SHUTDOWN, False):
                raise errors.OpPrereqError(
                    "Instance shutdown detection must be "
                    "enabled for zeroing to work", errors.ECODE_INVAL)

            # Check that the instance is set to boot from the disk
            if constants.HV_BOOT_ORDER in hvparams and \
               hvparams[constants.HV_BOOT_ORDER] != constants.HT_BO_DISK:
                raise errors.OpPrereqError(
                    "Booting from disk must be set for zeroing "
                    "to work", errors.ECODE_INVAL)

            # Check that the zeroing image is set
            if not self.cfg.GetZeroingImage():
                raise errors.OpPrereqError(
                    "A zeroing image must be set for zeroing to"
                    " work", errors.ECODE_INVAL)
예제 #2
0
class LUBackupExport(LogicalUnit):
    """Export an instance to an image in the cluster.

  """
    HPATH = "instance-export"
    HTYPE = constants.HTYPE_INSTANCE
    REQ_BGL = False

    def CheckArguments(self):
        """Check the arguments.

    """
        self.x509_key_name = self.op.x509_key_name
        self.dest_x509_ca_pem = self.op.destination_x509_ca

        if self.op.mode == constants.EXPORT_MODE_REMOTE:
            if not self.x509_key_name:
                raise errors.OpPrereqError(
                    "Missing X509 key name for encryption", errors.ECODE_INVAL)

            if not self.dest_x509_ca_pem:
                raise errors.OpPrereqError("Missing destination X509 CA",
                                           errors.ECODE_INVAL)

        if self.op.zero_free_space and not self.op.compress:
            raise errors.OpPrereqError(
                "Zeroing free space does not make sense "
                "unless compression is used")

        if self.op.zero_free_space and not self.op.shutdown:
            raise errors.OpPrereqError(
                "Unless the instance is shut down, zeroing "
                "cannot be used.")

    def ExpandNames(self):
        self._ExpandAndLockInstance()

        # In case we are zeroing, a node lock is required as we will be creating and
        # destroying a disk - allocations should be stopped, but not on the entire
        # cluster
        if self.op.zero_free_space:
            self.recalculate_locks = {
                locking.LEVEL_NODE: constants.LOCKS_REPLACE
            }
            self._LockInstancesNodes(primary_only=True)

        # Lock all nodes for local exports
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
            (self.op.target_node_uuid, self.op.target_node) = \
              ExpandNodeUuidAndName(self.cfg, self.op.target_node_uuid,
                                    self.op.target_node)
            # FIXME: lock only instance primary and destination node
            #
            # Sad but true, for now we have do lock all nodes, as we don't know where
            # the previous export might be, and in this LU we search for it and
            # remove it from its current node. In the future we could fix this by:
            #  - making a tasklet to search (share-lock all), then create the
            #    new one, then one to remove, after
            #  - removing the removal operation altogether
            self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET

    def DeclareLocks(self, level):
        """Last minute lock declaration."""
        # All nodes are locked anyway, so nothing to do here.

    def BuildHooksEnv(self):
        """Build hooks env.

    This will run on the master, primary node and target node.

    """
        env = {
            "EXPORT_MODE": self.op.mode,
            "EXPORT_NODE": self.op.target_node,
            "EXPORT_DO_SHUTDOWN": self.op.shutdown,
            "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
            # TODO: Generic function for boolean env variables
            "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
        }

        env.update(
            BuildInstanceHookEnvByObject(self,
                                         self.instance,
                                         secondary_nodes=self.secondary_nodes,
                                         disks=self.inst_disks))

        return env

    def BuildHooksNodes(self):
        """Build hooks nodes.

    """
        nl = [self.cfg.GetMasterNode(), self.instance.primary_node]

        if self.op.mode == constants.EXPORT_MODE_LOCAL:
            nl.append(self.op.target_node_uuid)

        return (nl, nl)

    def CheckPrereq(self):
        """Check prerequisites.

    This checks that the instance and node names are valid.

    """
        self.instance = self.cfg.GetInstanceInfoByName(self.op.instance_name)
        assert self.instance is not None, \
              "Cannot retrieve locked instance %s" % self.op.instance_name
        CheckNodeOnline(self, self.instance.primary_node)

        if (self.op.remove_instance
                and self.instance.admin_state == constants.ADMINST_UP
                and not self.op.shutdown):
            raise errors.OpPrereqError(
                "Can not remove instance without shutting it"
                " down before", errors.ECODE_STATE)

        if self.op.mode == constants.EXPORT_MODE_LOCAL:
            self.dst_node = self.cfg.GetNodeInfo(self.op.target_node_uuid)
            assert self.dst_node is not None

            CheckNodeOnline(self, self.dst_node.uuid)
            CheckNodeNotDrained(self, self.dst_node.uuid)

            self._cds = None
            self.dest_disk_info = None
            self.dest_x509_ca = None

        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
            self.dst_node = None

            if len(self.op.target_node) != len(self.instance.disks):
                raise errors.OpPrereqError(
                    ("Received destination information for %s"
                     " disks, but instance %s has %s disks") %
                    (len(self.op.target_node), self.op.instance_name,
                     len(self.instance.disks)), errors.ECODE_INVAL)

            cds = GetClusterDomainSecret()

            # Check X509 key name
            try:
                (key_name, hmac_digest, hmac_salt) = self.x509_key_name
            except (TypeError, ValueError), err:
                raise errors.OpPrereqError(
                    "Invalid data for X509 key name: %s" % err,
                    errors.ECODE_INVAL)

            if not utils.VerifySha1Hmac(
                    cds, key_name, hmac_digest, salt=hmac_salt):
                raise errors.OpPrereqError("HMAC for X509 key name is wrong",
                                           errors.ECODE_INVAL)

            # Load and verify CA
            try:
                (cert,
                 _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem,
                                                      cds)
            except OpenSSL.crypto.Error, err:
                raise errors.OpPrereqError(
                    "Unable to load destination X509 CA (%s)" % (err, ),
                    errors.ECODE_INVAL)

            (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
            if errcode is not None:
                raise errors.OpPrereqError(
                    "Invalid destination X509 CA (%s)" % (msg, ),
                    errors.ECODE_INVAL)

            self.dest_x509_ca = cert

            # Verify target information
            disk_info = []
            for idx, disk_data in enumerate(self.op.target_node):
                try:
                    (host, port, magic) = \
                      masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
                except errors.GenericError, err:
                    raise errors.OpPrereqError(
                        "Target info for disk %s: %s" % (idx, err),
                        errors.ECODE_INVAL)

                disk_info.append((host, port, magic))
예제 #3
0
def EvacuateNode(opts, args):
    """Relocate all secondary instance from a node.

  @param opts: the command line options selected by the user
  @type args: list
  @param args: should be an empty list
  @rtype: int
  @return: the desired exit code

  """
    if opts.dst_node is not None:
        ToStderr(
            "New secondary node given (disabling iallocator), hence evacuating"
            " secondary instances only.")
        opts.secondary_only = True
        opts.primary_only = False

    if opts.secondary_only and opts.primary_only:
        raise errors.OpPrereqError(
            "Only one of the --primary-only and"
            " --secondary-only options can be passed", errors.ECODE_INVAL)
    elif opts.primary_only:
        mode = constants.NODE_EVAC_PRI
    elif opts.secondary_only:
        mode = constants.NODE_EVAC_SEC
    else:
        mode = constants.NODE_EVAC_ALL

    # Determine affected instances
    fields = []

    if not opts.secondary_only:
        fields.append("pinst_list")
    if not opts.primary_only:
        fields.append("sinst_list")

    cl = GetClient()

    qcl = GetClient()
    result = qcl.QueryNodes(names=args, fields=fields, use_locking=False)
    qcl.Close()

    instances = set(
        itertools.chain(*itertools.chain(*itertools.chain(result))))

    if not instances:
        # No instances to evacuate
        ToStderr("No instances to evacuate on node(s) %s, exiting.",
                 utils.CommaJoin(args))
        return constants.EXIT_SUCCESS

    if not (opts.force or AskUser(
            "Relocate instance(s) %s from node(s) %s?" %
        (utils.CommaJoin(utils.NiceSort(instances)), utils.CommaJoin(args)))):
        return constants.EXIT_CONFIRMATION

    # Evacuate node
    op = opcodes.OpNodeEvacuate(node_name=args[0],
                                mode=mode,
                                remote_node=opts.dst_node,
                                iallocator=opts.iallocator,
                                early_release=opts.early_release,
                                ignore_soft_errors=opts.ignore_soft_errors)
    result = SubmitOrSend(op, opts, cl=cl)

    # Keep track of submitted jobs
    jex = JobExecutor(cl=cl, opts=opts)

    for (status, job_id) in result[constants.JOB_IDS_KEY]:
        jex.AddJobId(None, status, job_id)

    results = jex.GetResults()
    bad_cnt = len([row for row in results if not row[0]])
    if bad_cnt == 0:
        ToStdout("All instances evacuated successfully.")
        rcode = constants.EXIT_SUCCESS
    else:
        ToStdout("There were %s errors during the evacuation.", bad_cnt)
        rcode = constants.EXIT_FAILURE

    return rcode
예제 #4
0
def _SetupSSH(options, cluster_name, node, ssh_port, cl):
    """Configures a destination node's SSH daemon.

  @param options: Command line options
  @type cluster_name
  @param cluster_name: Cluster name
  @type node: string
  @param node: Destination node name
  @type ssh_port: int
  @param ssh_port: Destination node ssh port
  @param cl: luxi client

  """
    # Retrieve the list of master and master candidates
    candidate_filter = ["|", ["=", "role", "M"], ["=", "role", "C"]]
    result = cl.Query(constants.QR_NODE, ["uuid"], candidate_filter)
    if len(result.data) < 1:
        raise errors.OpPrereqError(
            "No master or master candidate node is found.")
    candidates = [uuid for ((_, uuid), ) in result.data]
    candidate_keys = ssh.QueryPubKeyFile(candidates)

    if options.force_join:
        ToStderr(
            "The \"--force-join\" option is no longer supported and will be"
            " ignored.")

    host_keys = _ReadSshKeys(constants.SSH_DAEMON_KEYFILES)

    (_, root_keyfiles) = \
      ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False)

    dsa_root_keyfiles = dict((kind, value)
                             for (kind, value) in root_keyfiles.items()
                             if kind == constants.SSHK_DSA)
    root_keys = _ReadSshKeys(dsa_root_keyfiles)

    (_, cert_pem) = \
      utils.ExtractX509Certificate(utils.ReadFile(pathutils.NODED_CERT_FILE))

    (ssh_key_type, ssh_key_bits) = \
      cl.QueryConfigValues(["ssh_key_type", "ssh_key_bits"])

    data = {
        constants.SSHS_CLUSTER_NAME: cluster_name,
        constants.SSHS_NODE_DAEMON_CERTIFICATE: cert_pem,
        constants.SSHS_SSH_HOST_KEY: host_keys,
        constants.SSHS_SSH_ROOT_KEY: root_keys,
        constants.SSHS_SSH_AUTHORIZED_KEYS: candidate_keys,
        constants.SSHS_SSH_KEY_TYPE: ssh_key_type,
        constants.SSHS_SSH_KEY_BITS: ssh_key_bits,
    }

    ssh.RunSshCmdWithStdin(cluster_name,
                           node,
                           pathutils.PREPARE_NODE_JOIN,
                           ssh_port,
                           data,
                           debug=options.debug,
                           verbose=options.verbose,
                           use_cluster_key=False,
                           ask_key=options.ssh_key_check,
                           strict_host_check=options.ssh_key_check)

    (_, pub_keyfile) = root_keyfiles[ssh_key_type]
    pub_key = ssh.ReadRemoteSshPubKey(pub_keyfile, node, cluster_name,
                                      ssh_port, options.ssh_key_check,
                                      options.ssh_key_check)
    # Unfortunately, we have to add the key with the node name rather than
    # the node's UUID here, because at this point, we do not have a UUID yet.
    # The entry will be corrected in noded later.
    ssh.AddPublicKey(node, pub_key)
예제 #5
0
def ComputeNics(op, cluster, default_ip, cfg, ec_id):
    """Computes the nics.

  @param op: The instance opcode
  @param cluster: Cluster configuration object
  @param default_ip: The default ip to assign
  @param cfg: An instance of the configuration object
  @param ec_id: Execution context ID

  @returns: The build up nics

  """
    nics = []
    for nic in op.nics:
        nic_mode_req = nic.get(constants.INIC_MODE, None)
        nic_mode = nic_mode_req
        if nic_mode is None or nic_mode == constants.VALUE_AUTO:
            nic_mode = cluster.nicparams[constants.PP_DEFAULT][
                constants.NIC_MODE]

        net = nic.get(constants.INIC_NETWORK, None)
        link = nic.get(constants.NIC_LINK, None)
        ip = nic.get(constants.INIC_IP, None)
        vlan = nic.get(constants.INIC_VLAN, None)

        if net is None or net.lower() == constants.VALUE_NONE:
            net = None
        else:
            if nic_mode_req is not None or link is not None:
                raise errors.OpPrereqError(
                    "If network is given, no mode or link"
                    " is allowed to be passed", errors.ECODE_INVAL)

        # ip validity checks
        if ip is None or ip.lower() == constants.VALUE_NONE:
            nic_ip = None
        elif ip.lower() == constants.VALUE_AUTO:
            if not op.name_check:
                raise errors.OpPrereqError(
                    "IP address set to auto but name checks"
                    " have been skipped", errors.ECODE_INVAL)
            nic_ip = default_ip
        else:
            # We defer pool operations until later, so that the iallocator has
            # filled in the instance's node(s) dimara
            if ip.lower() == constants.NIC_IP_POOL:
                if net is None:
                    raise errors.OpPrereqError(
                        "if ip=pool, parameter network"
                        " must be passed too", errors.ECODE_INVAL)

            elif not netutils.IPAddress.IsValid(ip):
                raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
                                           errors.ECODE_INVAL)

            nic_ip = ip

        # TODO: check the ip address for uniqueness
        if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip and not net:
            raise errors.OpPrereqError(
                "Routed nic mode requires an ip address"
                " if not attached to a network", errors.ECODE_INVAL)

        # MAC address verification
        mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
        if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
            mac = utils.NormalizeAndValidateMac(mac)

            try:
                # TODO: We need to factor this out
                cfg.ReserveMAC(mac, ec_id)
            except errors.ReservationError:
                raise errors.OpPrereqError(
                    "MAC address %s already in use"
                    " in cluster" % mac, errors.ECODE_NOTUNIQUE)

        #  Build nic parameters
        nicparams = {}
        if nic_mode_req:
            nicparams[constants.NIC_MODE] = nic_mode
        if link:
            nicparams[constants.NIC_LINK] = link
        if vlan:
            nicparams[constants.NIC_VLAN] = vlan

        check_params = cluster.SimpleFillNIC(nicparams)
        objects.NIC.CheckParameterSyntax(check_params)
        net_uuid = cfg.LookupNetwork(net)
        name = nic.get(constants.INIC_NAME, None)
        if name is not None and name.lower() == constants.VALUE_NONE:
            name = None
        nic_obj = objects.NIC(mac=mac,
                              ip=nic_ip,
                              name=name,
                              network=net_uuid,
                              nicparams=nicparams)
        nic_obj.uuid = cfg.GenerateUniqueID(ec_id)
        nics.append(nic_obj)

    return nics
예제 #6
0
  def CheckPrereq(self):
    """Check prerequisites.

    This checks that the instance is in the cluster.

    """
    (self.instance_uuid, self.instance_name) = \
      ExpandInstanceUuidAndName(self.lu.cfg, self.instance_uuid,
                                self.instance_name)
    self.instance = self.cfg.GetInstanceInfo(self.instance_uuid)
    assert self.instance is not None
    cluster = self.cfg.GetClusterInfo()

    if (not self.cleanup and
        not self.instance.admin_state == constants.ADMINST_UP and
        not self.failover and self.fallback):
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
                      " switching to failover")
      self.failover = True

    if self.instance.disk_template not in constants.DTS_MIRRORED:
      if self.failover:
        text = "failovers"
      else:
        text = "migrations"
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
                                 " %s" % (self.instance.disk_template, text),
                                 errors.ECODE_STATE)

    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
      CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")

      if self.lu.op.iallocator:
        assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC)
        self._RunAllocator()
      else:
        # We set set self.target_node_uuid as it is required by
        # BuildHooksEnv
        self.target_node_uuid = self.lu.op.target_node_uuid

      # Check that the target node is correct in terms of instance policy
      nodeinfo = self.cfg.GetNodeInfo(self.target_node_uuid)
      group_info = self.cfg.GetNodeGroup(nodeinfo.group)
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
                                                              group_info)
      CheckTargetNodeIPolicy(self.lu, ipolicy, self.instance, nodeinfo,
                             self.cfg, ignore=self.ignore_ipolicy)

      # self.target_node is already populated, either directly or by the
      # iallocator run
      target_node_uuid = self.target_node_uuid
      if self.target_node_uuid == self.instance.primary_node:
        raise errors.OpPrereqError(
          "Cannot migrate instance %s to its primary (%s)" %
          (self.instance.name,
           self.cfg.GetNodeName(self.instance.primary_node)),
          errors.ECODE_STATE)

      if len(self.lu.tasklets) == 1:
        # It is safe to release locks only when we're the only tasklet
        # in the LU
        ReleaseLocks(self.lu, locking.LEVEL_NODE,
                     keep=[self.instance.primary_node, self.target_node_uuid])
        ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC)

    else:
      secondary_node_uuids = \
        self.cfg.GetInstanceSecondaryNodes(self.instance.uuid)
      if not secondary_node_uuids:
        raise errors.ConfigurationError("No secondary node but using"
                                        " %s disk template" %
                                        self.instance.disk_template)
      self.target_node_uuid = target_node_uuid = secondary_node_uuids[0]
      if self.lu.op.iallocator or \
        (self.lu.op.target_node_uuid and
         self.lu.op.target_node_uuid != target_node_uuid):
        if self.failover:
          text = "failed over"
        else:
          text = "migrated"
        raise errors.OpPrereqError("Instances with disk template %s cannot"
                                   " be %s to arbitrary nodes"
                                   " (neither an iallocator nor a target"
                                   " node can be passed)" %
                                   (self.instance.disk_template, text),
                                   errors.ECODE_INVAL)
      nodeinfo = self.cfg.GetNodeInfo(target_node_uuid)
      group_info = self.cfg.GetNodeGroup(nodeinfo.group)
      ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster,
                                                              group_info)
      CheckTargetNodeIPolicy(self.lu, ipolicy, self.instance, nodeinfo,
                             self.cfg, ignore=self.ignore_ipolicy)

    i_be = cluster.FillBE(self.instance)

    # check memory requirements on the secondary node
    if (not self.cleanup and
         (not self.failover or
           self.instance.admin_state == constants.ADMINST_UP)):
      self.tgt_free_mem = CheckNodeFreeMemory(
          self.lu, target_node_uuid,
          "migrating instance %s" % self.instance.name,
          i_be[constants.BE_MINMEM], self.instance.hypervisor,
          self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor])
    else:
      self.lu.LogInfo("Not checking memory on the secondary node as"
                      " instance will not be started")

    # check if failover must be forced instead of migration
    if (not self.cleanup and not self.failover and
        i_be[constants.BE_ALWAYS_FAILOVER]):
      self.lu.LogInfo("Instance configured to always failover; fallback"
                      " to failover")
      self.failover = True

    # check bridge existance
    CheckInstanceBridgesExist(self.lu, self.instance,
                              node_uuid=target_node_uuid)

    if not self.cleanup:
      CheckNodeNotDrained(self.lu, target_node_uuid)
      if not self.failover:
        result = self.rpc.call_instance_migratable(self.instance.primary_node,
                                                   self.instance)
        if result.fail_msg and self.fallback:
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
                          " failover")
          self.failover = True
        else:
          result.Raise("Can't migrate, please use failover",
                       prereq=True, ecode=errors.ECODE_STATE)

    assert not (self.failover and self.cleanup)

    if not self.failover:
      if self.lu.op.live is not None and self.lu.op.mode is not None:
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
                                   " parameters are accepted",
                                   errors.ECODE_INVAL)
      if self.lu.op.live is not None:
        if self.lu.op.live:
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
        else:
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
        # reset the 'live' parameter to None so that repeated
        # invocations of CheckPrereq do not raise an exception
        self.lu.op.live = None
      elif self.lu.op.mode is None:
        # read the default value from the hypervisor
        i_hv = cluster.FillHV(self.instance, skip_globals=False)
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]

      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
    else:
      # Failover is never live
      self.live = False

    if not (self.failover or self.cleanup):
      remote_info = self.rpc.call_instance_info(
          self.instance.primary_node, self.instance.name,
          self.instance.hypervisor, cluster.hvparams[self.instance.hypervisor])
      remote_info.Raise("Error checking instance on node %s" %
                        self.cfg.GetNodeName(self.instance.primary_node))
      instance_running = bool(remote_info.payload)
      if instance_running:
        self.current_mem = int(remote_info.payload["memory"])
예제 #7
0
 def CheckArguments(self):
     if (self.op.gateway
             and (self.op.add_reserved_ips or self.op.remove_reserved_ips)):
         raise errors.OpPrereqError(
             "Cannot modify gateway and reserved ips"
             " at once", errors.ECODE_INVAL)