assert len(disk_info) == len(self.op.target_node) self.dest_disk_info = disk_info else: raise errors.ProgrammerError("Unhandled export mode %r" % self.op.mode) # Check prerequisites for zeroing if self.op.zero_free_space: # Check that user shutdown detection has been enabled hvparams = self.cfg.GetClusterInfo().FillHV(self.instance) if self.instance.hypervisor == constants.HT_KVM and \ not hvparams.get(constants.HV_KVM_USER_SHUTDOWN, False): raise errors.OpPrereqError( "Instance shutdown detection must be " "enabled for zeroing to work", errors.ECODE_INVAL) # Check that the instance is set to boot from the disk if constants.HV_BOOT_ORDER in hvparams and \ hvparams[constants.HV_BOOT_ORDER] != constants.HT_BO_DISK: raise errors.OpPrereqError( "Booting from disk must be set for zeroing " "to work", errors.ECODE_INVAL) # Check that the zeroing image is set if not self.cfg.GetZeroingImage(): raise errors.OpPrereqError( "A zeroing image must be set for zeroing to" " work", errors.ECODE_INVAL)
class LUBackupExport(LogicalUnit): """Export an instance to an image in the cluster. """ HPATH = "instance-export" HTYPE = constants.HTYPE_INSTANCE REQ_BGL = False def CheckArguments(self): """Check the arguments. """ self.x509_key_name = self.op.x509_key_name self.dest_x509_ca_pem = self.op.destination_x509_ca if self.op.mode == constants.EXPORT_MODE_REMOTE: if not self.x509_key_name: raise errors.OpPrereqError( "Missing X509 key name for encryption", errors.ECODE_INVAL) if not self.dest_x509_ca_pem: raise errors.OpPrereqError("Missing destination X509 CA", errors.ECODE_INVAL) if self.op.zero_free_space and not self.op.compress: raise errors.OpPrereqError( "Zeroing free space does not make sense " "unless compression is used") if self.op.zero_free_space and not self.op.shutdown: raise errors.OpPrereqError( "Unless the instance is shut down, zeroing " "cannot be used.") def ExpandNames(self): self._ExpandAndLockInstance() # In case we are zeroing, a node lock is required as we will be creating and # destroying a disk - allocations should be stopped, but not on the entire # cluster if self.op.zero_free_space: self.recalculate_locks = { locking.LEVEL_NODE: constants.LOCKS_REPLACE } self._LockInstancesNodes(primary_only=True) # Lock all nodes for local exports if self.op.mode == constants.EXPORT_MODE_LOCAL: (self.op.target_node_uuid, self.op.target_node) = \ ExpandNodeUuidAndName(self.cfg, self.op.target_node_uuid, self.op.target_node) # FIXME: lock only instance primary and destination node # # Sad but true, for now we have do lock all nodes, as we don't know where # the previous export might be, and in this LU we search for it and # remove it from its current node. In the future we could fix this by: # - making a tasklet to search (share-lock all), then create the # new one, then one to remove, after # - removing the removal operation altogether self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET def DeclareLocks(self, level): """Last minute lock declaration.""" # All nodes are locked anyway, so nothing to do here. def BuildHooksEnv(self): """Build hooks env. This will run on the master, primary node and target node. """ env = { "EXPORT_MODE": self.op.mode, "EXPORT_NODE": self.op.target_node, "EXPORT_DO_SHUTDOWN": self.op.shutdown, "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout, # TODO: Generic function for boolean env variables "REMOVE_INSTANCE": str(bool(self.op.remove_instance)), } env.update( BuildInstanceHookEnvByObject(self, self.instance, secondary_nodes=self.secondary_nodes, disks=self.inst_disks)) return env def BuildHooksNodes(self): """Build hooks nodes. """ nl = [self.cfg.GetMasterNode(), self.instance.primary_node] if self.op.mode == constants.EXPORT_MODE_LOCAL: nl.append(self.op.target_node_uuid) return (nl, nl) def CheckPrereq(self): """Check prerequisites. This checks that the instance and node names are valid. """ self.instance = self.cfg.GetInstanceInfoByName(self.op.instance_name) assert self.instance is not None, \ "Cannot retrieve locked instance %s" % self.op.instance_name CheckNodeOnline(self, self.instance.primary_node) if (self.op.remove_instance and self.instance.admin_state == constants.ADMINST_UP and not self.op.shutdown): raise errors.OpPrereqError( "Can not remove instance without shutting it" " down before", errors.ECODE_STATE) if self.op.mode == constants.EXPORT_MODE_LOCAL: self.dst_node = self.cfg.GetNodeInfo(self.op.target_node_uuid) assert self.dst_node is not None CheckNodeOnline(self, self.dst_node.uuid) CheckNodeNotDrained(self, self.dst_node.uuid) self._cds = None self.dest_disk_info = None self.dest_x509_ca = None elif self.op.mode == constants.EXPORT_MODE_REMOTE: self.dst_node = None if len(self.op.target_node) != len(self.instance.disks): raise errors.OpPrereqError( ("Received destination information for %s" " disks, but instance %s has %s disks") % (len(self.op.target_node), self.op.instance_name, len(self.instance.disks)), errors.ECODE_INVAL) cds = GetClusterDomainSecret() # Check X509 key name try: (key_name, hmac_digest, hmac_salt) = self.x509_key_name except (TypeError, ValueError), err: raise errors.OpPrereqError( "Invalid data for X509 key name: %s" % err, errors.ECODE_INVAL) if not utils.VerifySha1Hmac( cds, key_name, hmac_digest, salt=hmac_salt): raise errors.OpPrereqError("HMAC for X509 key name is wrong", errors.ECODE_INVAL) # Load and verify CA try: (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds) except OpenSSL.crypto.Error, err: raise errors.OpPrereqError( "Unable to load destination X509 CA (%s)" % (err, ), errors.ECODE_INVAL) (errcode, msg) = utils.VerifyX509Certificate(cert, None, None) if errcode is not None: raise errors.OpPrereqError( "Invalid destination X509 CA (%s)" % (msg, ), errors.ECODE_INVAL) self.dest_x509_ca = cert # Verify target information disk_info = [] for idx, disk_data in enumerate(self.op.target_node): try: (host, port, magic) = \ masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data) except errors.GenericError, err: raise errors.OpPrereqError( "Target info for disk %s: %s" % (idx, err), errors.ECODE_INVAL) disk_info.append((host, port, magic))
def EvacuateNode(opts, args): """Relocate all secondary instance from a node. @param opts: the command line options selected by the user @type args: list @param args: should be an empty list @rtype: int @return: the desired exit code """ if opts.dst_node is not None: ToStderr( "New secondary node given (disabling iallocator), hence evacuating" " secondary instances only.") opts.secondary_only = True opts.primary_only = False if opts.secondary_only and opts.primary_only: raise errors.OpPrereqError( "Only one of the --primary-only and" " --secondary-only options can be passed", errors.ECODE_INVAL) elif opts.primary_only: mode = constants.NODE_EVAC_PRI elif opts.secondary_only: mode = constants.NODE_EVAC_SEC else: mode = constants.NODE_EVAC_ALL # Determine affected instances fields = [] if not opts.secondary_only: fields.append("pinst_list") if not opts.primary_only: fields.append("sinst_list") cl = GetClient() qcl = GetClient() result = qcl.QueryNodes(names=args, fields=fields, use_locking=False) qcl.Close() instances = set( itertools.chain(*itertools.chain(*itertools.chain(result)))) if not instances: # No instances to evacuate ToStderr("No instances to evacuate on node(s) %s, exiting.", utils.CommaJoin(args)) return constants.EXIT_SUCCESS if not (opts.force or AskUser( "Relocate instance(s) %s from node(s) %s?" % (utils.CommaJoin(utils.NiceSort(instances)), utils.CommaJoin(args)))): return constants.EXIT_CONFIRMATION # Evacuate node op = opcodes.OpNodeEvacuate(node_name=args[0], mode=mode, remote_node=opts.dst_node, iallocator=opts.iallocator, early_release=opts.early_release, ignore_soft_errors=opts.ignore_soft_errors) result = SubmitOrSend(op, opts, cl=cl) # Keep track of submitted jobs jex = JobExecutor(cl=cl, opts=opts) for (status, job_id) in result[constants.JOB_IDS_KEY]: jex.AddJobId(None, status, job_id) results = jex.GetResults() bad_cnt = len([row for row in results if not row[0]]) if bad_cnt == 0: ToStdout("All instances evacuated successfully.") rcode = constants.EXIT_SUCCESS else: ToStdout("There were %s errors during the evacuation.", bad_cnt) rcode = constants.EXIT_FAILURE return rcode
def _SetupSSH(options, cluster_name, node, ssh_port, cl): """Configures a destination node's SSH daemon. @param options: Command line options @type cluster_name @param cluster_name: Cluster name @type node: string @param node: Destination node name @type ssh_port: int @param ssh_port: Destination node ssh port @param cl: luxi client """ # Retrieve the list of master and master candidates candidate_filter = ["|", ["=", "role", "M"], ["=", "role", "C"]] result = cl.Query(constants.QR_NODE, ["uuid"], candidate_filter) if len(result.data) < 1: raise errors.OpPrereqError( "No master or master candidate node is found.") candidates = [uuid for ((_, uuid), ) in result.data] candidate_keys = ssh.QueryPubKeyFile(candidates) if options.force_join: ToStderr( "The \"--force-join\" option is no longer supported and will be" " ignored.") host_keys = _ReadSshKeys(constants.SSH_DAEMON_KEYFILES) (_, root_keyfiles) = \ ssh.GetAllUserFiles(constants.SSH_LOGIN_USER, mkdir=False, dircheck=False) dsa_root_keyfiles = dict((kind, value) for (kind, value) in root_keyfiles.items() if kind == constants.SSHK_DSA) root_keys = _ReadSshKeys(dsa_root_keyfiles) (_, cert_pem) = \ utils.ExtractX509Certificate(utils.ReadFile(pathutils.NODED_CERT_FILE)) (ssh_key_type, ssh_key_bits) = \ cl.QueryConfigValues(["ssh_key_type", "ssh_key_bits"]) data = { constants.SSHS_CLUSTER_NAME: cluster_name, constants.SSHS_NODE_DAEMON_CERTIFICATE: cert_pem, constants.SSHS_SSH_HOST_KEY: host_keys, constants.SSHS_SSH_ROOT_KEY: root_keys, constants.SSHS_SSH_AUTHORIZED_KEYS: candidate_keys, constants.SSHS_SSH_KEY_TYPE: ssh_key_type, constants.SSHS_SSH_KEY_BITS: ssh_key_bits, } ssh.RunSshCmdWithStdin(cluster_name, node, pathutils.PREPARE_NODE_JOIN, ssh_port, data, debug=options.debug, verbose=options.verbose, use_cluster_key=False, ask_key=options.ssh_key_check, strict_host_check=options.ssh_key_check) (_, pub_keyfile) = root_keyfiles[ssh_key_type] pub_key = ssh.ReadRemoteSshPubKey(pub_keyfile, node, cluster_name, ssh_port, options.ssh_key_check, options.ssh_key_check) # Unfortunately, we have to add the key with the node name rather than # the node's UUID here, because at this point, we do not have a UUID yet. # The entry will be corrected in noded later. ssh.AddPublicKey(node, pub_key)
def ComputeNics(op, cluster, default_ip, cfg, ec_id): """Computes the nics. @param op: The instance opcode @param cluster: Cluster configuration object @param default_ip: The default ip to assign @param cfg: An instance of the configuration object @param ec_id: Execution context ID @returns: The build up nics """ nics = [] for nic in op.nics: nic_mode_req = nic.get(constants.INIC_MODE, None) nic_mode = nic_mode_req if nic_mode is None or nic_mode == constants.VALUE_AUTO: nic_mode = cluster.nicparams[constants.PP_DEFAULT][ constants.NIC_MODE] net = nic.get(constants.INIC_NETWORK, None) link = nic.get(constants.NIC_LINK, None) ip = nic.get(constants.INIC_IP, None) vlan = nic.get(constants.INIC_VLAN, None) if net is None or net.lower() == constants.VALUE_NONE: net = None else: if nic_mode_req is not None or link is not None: raise errors.OpPrereqError( "If network is given, no mode or link" " is allowed to be passed", errors.ECODE_INVAL) # ip validity checks if ip is None or ip.lower() == constants.VALUE_NONE: nic_ip = None elif ip.lower() == constants.VALUE_AUTO: if not op.name_check: raise errors.OpPrereqError( "IP address set to auto but name checks" " have been skipped", errors.ECODE_INVAL) nic_ip = default_ip else: # We defer pool operations until later, so that the iallocator has # filled in the instance's node(s) dimara if ip.lower() == constants.NIC_IP_POOL: if net is None: raise errors.OpPrereqError( "if ip=pool, parameter network" " must be passed too", errors.ECODE_INVAL) elif not netutils.IPAddress.IsValid(ip): raise errors.OpPrereqError("Invalid IP address '%s'" % ip, errors.ECODE_INVAL) nic_ip = ip # TODO: check the ip address for uniqueness if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip and not net: raise errors.OpPrereqError( "Routed nic mode requires an ip address" " if not attached to a network", errors.ECODE_INVAL) # MAC address verification mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO) if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE): mac = utils.NormalizeAndValidateMac(mac) try: # TODO: We need to factor this out cfg.ReserveMAC(mac, ec_id) except errors.ReservationError: raise errors.OpPrereqError( "MAC address %s already in use" " in cluster" % mac, errors.ECODE_NOTUNIQUE) # Build nic parameters nicparams = {} if nic_mode_req: nicparams[constants.NIC_MODE] = nic_mode if link: nicparams[constants.NIC_LINK] = link if vlan: nicparams[constants.NIC_VLAN] = vlan check_params = cluster.SimpleFillNIC(nicparams) objects.NIC.CheckParameterSyntax(check_params) net_uuid = cfg.LookupNetwork(net) name = nic.get(constants.INIC_NAME, None) if name is not None and name.lower() == constants.VALUE_NONE: name = None nic_obj = objects.NIC(mac=mac, ip=nic_ip, name=name, network=net_uuid, nicparams=nicparams) nic_obj.uuid = cfg.GenerateUniqueID(ec_id) nics.append(nic_obj) return nics
def CheckPrereq(self): """Check prerequisites. This checks that the instance is in the cluster. """ (self.instance_uuid, self.instance_name) = \ ExpandInstanceUuidAndName(self.lu.cfg, self.instance_uuid, self.instance_name) self.instance = self.cfg.GetInstanceInfo(self.instance_uuid) assert self.instance is not None cluster = self.cfg.GetClusterInfo() if (not self.cleanup and not self.instance.admin_state == constants.ADMINST_UP and not self.failover and self.fallback): self.lu.LogInfo("Instance is marked down or offline, fallback allowed," " switching to failover") self.failover = True if self.instance.disk_template not in constants.DTS_MIRRORED: if self.failover: text = "failovers" else: text = "migrations" raise errors.OpPrereqError("Instance's disk layout '%s' does not allow" " %s" % (self.instance.disk_template, text), errors.ECODE_STATE) if self.instance.disk_template in constants.DTS_EXT_MIRROR: CheckIAllocatorOrNode(self.lu, "iallocator", "target_node") if self.lu.op.iallocator: assert locking.NAL in self.lu.owned_locks(locking.LEVEL_NODE_ALLOC) self._RunAllocator() else: # We set set self.target_node_uuid as it is required by # BuildHooksEnv self.target_node_uuid = self.lu.op.target_node_uuid # Check that the target node is correct in terms of instance policy nodeinfo = self.cfg.GetNodeInfo(self.target_node_uuid) group_info = self.cfg.GetNodeGroup(nodeinfo.group) ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info) CheckTargetNodeIPolicy(self.lu, ipolicy, self.instance, nodeinfo, self.cfg, ignore=self.ignore_ipolicy) # self.target_node is already populated, either directly or by the # iallocator run target_node_uuid = self.target_node_uuid if self.target_node_uuid == self.instance.primary_node: raise errors.OpPrereqError( "Cannot migrate instance %s to its primary (%s)" % (self.instance.name, self.cfg.GetNodeName(self.instance.primary_node)), errors.ECODE_STATE) if len(self.lu.tasklets) == 1: # It is safe to release locks only when we're the only tasklet # in the LU ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=[self.instance.primary_node, self.target_node_uuid]) ReleaseLocks(self.lu, locking.LEVEL_NODE_ALLOC) else: secondary_node_uuids = \ self.cfg.GetInstanceSecondaryNodes(self.instance.uuid) if not secondary_node_uuids: raise errors.ConfigurationError("No secondary node but using" " %s disk template" % self.instance.disk_template) self.target_node_uuid = target_node_uuid = secondary_node_uuids[0] if self.lu.op.iallocator or \ (self.lu.op.target_node_uuid and self.lu.op.target_node_uuid != target_node_uuid): if self.failover: text = "failed over" else: text = "migrated" raise errors.OpPrereqError("Instances with disk template %s cannot" " be %s to arbitrary nodes" " (neither an iallocator nor a target" " node can be passed)" % (self.instance.disk_template, text), errors.ECODE_INVAL) nodeinfo = self.cfg.GetNodeInfo(target_node_uuid) group_info = self.cfg.GetNodeGroup(nodeinfo.group) ipolicy = ganeti.masterd.instance.CalculateGroupIPolicy(cluster, group_info) CheckTargetNodeIPolicy(self.lu, ipolicy, self.instance, nodeinfo, self.cfg, ignore=self.ignore_ipolicy) i_be = cluster.FillBE(self.instance) # check memory requirements on the secondary node if (not self.cleanup and (not self.failover or self.instance.admin_state == constants.ADMINST_UP)): self.tgt_free_mem = CheckNodeFreeMemory( self.lu, target_node_uuid, "migrating instance %s" % self.instance.name, i_be[constants.BE_MINMEM], self.instance.hypervisor, self.cfg.GetClusterInfo().hvparams[self.instance.hypervisor]) else: self.lu.LogInfo("Not checking memory on the secondary node as" " instance will not be started") # check if failover must be forced instead of migration if (not self.cleanup and not self.failover and i_be[constants.BE_ALWAYS_FAILOVER]): self.lu.LogInfo("Instance configured to always failover; fallback" " to failover") self.failover = True # check bridge existance CheckInstanceBridgesExist(self.lu, self.instance, node_uuid=target_node_uuid) if not self.cleanup: CheckNodeNotDrained(self.lu, target_node_uuid) if not self.failover: result = self.rpc.call_instance_migratable(self.instance.primary_node, self.instance) if result.fail_msg and self.fallback: self.lu.LogInfo("Can't migrate, instance offline, fallback to" " failover") self.failover = True else: result.Raise("Can't migrate, please use failover", prereq=True, ecode=errors.ECODE_STATE) assert not (self.failover and self.cleanup) if not self.failover: if self.lu.op.live is not None and self.lu.op.mode is not None: raise errors.OpPrereqError("Only one of the 'live' and 'mode'" " parameters are accepted", errors.ECODE_INVAL) if self.lu.op.live is not None: if self.lu.op.live: self.lu.op.mode = constants.HT_MIGRATION_LIVE else: self.lu.op.mode = constants.HT_MIGRATION_NONLIVE # reset the 'live' parameter to None so that repeated # invocations of CheckPrereq do not raise an exception self.lu.op.live = None elif self.lu.op.mode is None: # read the default value from the hypervisor i_hv = cluster.FillHV(self.instance, skip_globals=False) self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE] self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE else: # Failover is never live self.live = False if not (self.failover or self.cleanup): remote_info = self.rpc.call_instance_info( self.instance.primary_node, self.instance.name, self.instance.hypervisor, cluster.hvparams[self.instance.hypervisor]) remote_info.Raise("Error checking instance on node %s" % self.cfg.GetNodeName(self.instance.primary_node)) instance_running = bool(remote_info.payload) if instance_running: self.current_mem = int(remote_info.payload["memory"])
def CheckArguments(self): if (self.op.gateway and (self.op.add_reserved_ips or self.op.remove_reserved_ips)): raise errors.OpPrereqError( "Cannot modify gateway and reserved ips" " at once", errors.ECODE_INVAL)