def destroyXenPaging(self): if self.actmem == "0": return if self.xenpaging_pid: try: os.kill(self.xenpaging_pid, signal.SIGHUP) except OSError, exn: log.exception(exn) for i in xrange(100): try: (p, rv) = os.waitpid(self.xenpaging_pid, os.WNOHANG) if p == self.xenpaging_pid: break except OSError: # This is expected if Xend has been restarted within # the life of this domain. In this case, we can kill # the process, but we can't wait for it because it's # not our child. We continue this loop, and after it is # terminated make really sure the process is going away # (SIGKILL). pass time.sleep(0.1) else: log.warning("xenpaging %d took more than 10s " "to terminate: sending SIGKILL" % self.xenpaging_pid) try: os.kill(self.xenpaging_pid, signal.SIGKILL) os.waitpid(self.xenpaging_pid, 0) except OSError: # This happens if the process doesn't exist. pass
def prepareEnvironment(self): """Prepare the environment for the execution of the domain. This method is called before any devices are set up.""" domid = self.vm.getDomid() # Delete left-over pipes try: os.unlink('/var/run/tap/qemu-read-%d' % domid) os.unlink('/var/run/tap/qemu-write-%d' % domid) except: pass # No device model, don't create pipes if self.device_model is None: return if platform.system() != 'SunOS': # If we use a device model, the pipes for communication between # blktapctrl and ioemu must be present before the devices are # created (blktapctrl must access them for new block devices) try: os.makedirs('/var/run/tap', 0755) except: pass try: os.mkfifo('/var/run/tap/qemu-read-%d' % domid, 0600) os.mkfifo('/var/run/tap/qemu-write-%d' % domid, 0600) except OSError, e: log.warn('Could not create blktap pipes for domain %d' % domid) log.exception(e) pass
def perform(self, req): """General operation handler for posted operations. For operation 'foo' looks for a method op_foo and calls it with op_foo(op, req). Replies with code 500 if op_foo is not found. The method must return a list when req.use_sxp is true and an HTML string otherwise (or list). Methods may also return a ThreadRequest (for incomplete processing). req request """ op = req.args.get('op') if op is None or len(op) != 1: req.setResponseCode(http.NOT_ACCEPTABLE, "Invalid request") return '' op = op[0] op_method = self.get_op_method(op) if op_method is None: req.setResponseCode(http.NOT_IMPLEMENTED, "Operation not implemented: " + op) req.setHeader("Content-Type", "text/plain") req.write("Operation not implemented: " + op) return '' else: try: return op_method(op, req) except Exception, exn: req.setResponseCode(http.INTERNAL_SERVER_ERROR, "Request failed: " + op) log.exception("Request %s failed.", op) if req.useSxp(): return ['xend.err', str(exn)] else: return "<p>%s</p>" % str(exn)
def run(self, status): try: log.info("Xend Daemon started") xc = xen.lowlevel.xc.xc() xinfo = xc.xeninfo() log.info("Xend changeset: %s.", xinfo['xen_changeset']) del xc try: from xen import VERSION log.info("Xend version: %s", VERSION) except ImportError: log.info("Xend version: Unknown.") relocate.listenRelocation() servers = SrvServer.create() servers.start(status) del servers except Exception, ex: print >>sys.stderr, 'Exception starting xend:', ex if XEND_DEBUG: traceback.print_exc() log.exception("Exception starting xend (%s)" % ex) if status: status.write('1') status.close() sys.exit(1)
def run(self): """Runs the method and stores the result for later access. Is invoked by threading.Thread.start(). """ self.thread_id = thread.get_ident() self.task_progress_lock.acquire() try: self.task_progress[self.thread_id] = {} self.progress = 0 finally: self.task_progress_lock.release() try: result = self.func(*self.args) if result['Status'] == 'Success': self.result = result['Value'] self.set_status(XEN_API_TASK_STATUS_TYPE[1]) else: self.error_info = result['ErrorDescription'] self.set_status(XEN_API_TASK_STATUS_TYPE[2]) except Exception, e: log.exception('Error running Async Task') self.error_info = ['INTERNAL ERROR', str(e)] self.set_status(XEN_API_TASK_STATUS_TYPE[2])
def restore(xd, fd, dominfo = None, paused = False, relocating = False): try: if not os.path.isdir("/var/lib/xen"): os.makedirs("/var/lib/xen") except Exception, exn: log.exception("Can't create directory '/var/lib/xen'") raise XendError("Can't create directory '/var/lib/xen'")
def save(fd, dominfo, network, live, dst, checkpoint=False, node=-1): try: if not os.path.isdir("/var/lib/xen"): os.makedirs("/var/lib/xen") except Exception, exn: log.exception("Can't create directory '/var/lib/xen'") raise XendError("Can't create directory '/var/lib/xen'")
def restore(xd, fd, dominfo=None, paused=False, relocating=False): try: if not os.path.isdir("/var/lib/xen"): os.makedirs("/var/lib/xen") except Exception, exn: log.exception("Can't create directory '/var/lib/xen'") raise XendError("Can't create directory '/var/lib/xen'")
def run(self): """Runs the method and stores the result for later access. Is invoked by threading.Thread.start(). """ self.thread_id = thread.get_ident() self.task_progress_lock.acquire() try: self.task_progress[self.thread_id] = {} self.progress = 0 self.created = now() finally: self.task_progress_lock.release() try: result = self.func(*self.args) if result['Status'] == 'Success': self.result = result['Value'] self.set_status(XEN_API_TASK_STATUS_TYPE[1]) else: self.error_info = result['ErrorDescription'] self.set_status(XEN_API_TASK_STATUS_TYPE[2]) except Exception, e: log.exception('Error running Async Task') self.error_info = ['INTERNAL ERROR', str(e)] self.set_status(XEN_API_TASK_STATUS_TYPE[2])
def set_CPU_Affinity(self, vcpu, cpumap): domid = self.xend_domain_instance.getDomid() dominfo = self.xend_domain_instance if not dominfo: raise XendInvalidDomain(str(domid)) # if vcpu is keyword 'all', apply the cpumap to all vcpus if str(vcpu).lower() == "all": vcpus = range(0, int(dominfo.getVCpuCount())) else: vcpus = [int(vcpu)] # set the same cpumask for all vcpus rc = 0 cpus = dominfo.getCpus() cpumap = map(int, cpumap.split(",")) for v in vcpus: try: if dominfo._stateGet() in (DOM_STATE_RUNNING, DOM_STATE_PAUSED): rc = xc.vcpu_setaffinity(domid, v, cpumap) cpus[v] = cpumap except Exception, ex: log.exception(ex) raise XendError("Cannot pin vcpu: %d to cpu: %s - %s" % \ (v, cpumap, str(ex)))
def prepareEnvironment(self): """Prepare the environment for the execution of the domain. This method is called before any devices are set up.""" domid = self.vm.getDomid() # Delete left-over pipes try: os.unlink("/var/run/tap/qemu-read-%d" % domid) os.unlink("/var/run/tap/qemu-write-%d" % domid) except: pass # No device model, don't create pipes if self.device_model is None: return if platform.system() != "SunOS": # If we use a device model, the pipes for communication between # blktapctrl and ioemu must be present before the devices are # created (blktapctrl must access them for new block devices) try: os.makedirs("/var/run/tap", 0755) except: pass try: os.mkfifo("/var/run/tap/qemu-read-%d" % domid, 0600) os.mkfifo("/var/run/tap/qemu-write-%d" % domid, 0600) except OSError, e: log.warn("Could not create blktap pipes for domain %d" % domid) log.exception(e) pass
def destroyDeviceModel(self): if self.device_model is None: return if self.pid: self.sentinel_lock.acquire() try: try: os.kill(self.pid, signal.SIGHUP) except OSError, exn: log.exception(exn) try: # Try to reap the child every 100ms for 10s. Then SIGKILL it. for i in xrange(100): (p, rv) = os.waitpid(self.pid, os.WNOHANG) if p == self.pid: break time.sleep(0.1) else: log.warning("DeviceModel %d took more than 10s " "to terminate: sending SIGKILL" % self.pid) os.kill(self.pid, signal.SIGKILL) os.waitpid(self.pid, 0) except OSError, exn: # This is expected if Xend has been restarted within the # life of this domain. In this case, we can kill the process, # but we can't wait for it because it's not our child. # We just make really sure it's going away (SIGKILL) first. os.kill(self.pid, signal.SIGKILL) state = xstransact.Remove("/local/domain/0/device-model/%i" % self.vm.getDomid())
def perform(self, req): """General operation handler for posted operations. For operation 'foo' looks for a method op_foo and calls it with op_foo(op, req). Replies with code 500 if op_foo is not found. The method must return a list when req.use_sxp is true and an HTML string otherwise (or list). Methods may also return a ThreadRequest (for incomplete processing). req request """ op = req.args.get('op') if op is None or len(op) != 1: req.setResponseCode(http.NOT_ACCEPTABLE, "Invalid request") return '' op = op[0] op_method = self.get_op_method(op) if op_method is None: req.setResponseCode(http.NOT_IMPLEMENTED, "Operation not implemented: " + op) req.setHeader("Content-Type", "text/plain") req.write("Operation not implemented: " + op) return '' else: try: return op_method(op, req) except Exception, exn: log.exception("Request %s failed.", op) if req.useSxp(): return ['xend.err', str(exn)] else: return "<p>%s</p>" % str(exn)
def run(self, status): try: log.info("Xend Daemon started") xc = xen.lowlevel.xc.xc() xinfo = xc.xeninfo() log.info("Xend changeset: %s.", xinfo['xen_changeset']) del xc try: from xen import VERSION log.info("Xend version: %s", VERSION) except ImportError: log.info("Xend version: Unknown.") relocate.listenRelocation() servers = SrvServer.create() servers.start(status) del servers except Exception, ex: print >> sys.stderr, 'Exception starting xend:', ex if XEND_DEBUG: traceback.print_exc() log.exception("Exception starting xend (%s)" % ex) if status: status.write('1') status.close() sys.exit(1)
def _loadConfig(servers, root, reload): if xoptions.get_xend_http_server(): servers.add(HttpServer(root, xoptions.get_xend_address(), xoptions.get_xend_port())) if xoptions.get_xend_unix_server(): path = xoptions.get_xend_unix_path() log.info('unix path=' + path) servers.add(UnixHttpServer(root, path)) api_cfg = xoptions.get_xen_api_server() if api_cfg: try: for server_cfg in api_cfg: # Parse the xen-api-server config ssl_key_file = None ssl_cert_file = None auth_method = XendAPI.AUTH_NONE hosts_allowed = None host_addr = server_cfg[0].split(':', 1) if len(host_addr) == 1: if host_addr[0].lower() == 'unix': use_tcp = False host = 'localhost' port = 0 else: use_tcp = True host = '' port = int(host_addr[0]) else: use_tcp = True host = str(host_addr[0]) port = int(host_addr[1]) if len(server_cfg) > 1: if server_cfg[1] in [XendAPI.AUTH_PAM, XendAPI.AUTH_NONE]: auth_method = server_cfg[1] if len(server_cfg) > 2 and len(server_cfg[2]): hosts_allowed = map(re.compile, server_cfg[2].split(' ')) if len(server_cfg) > 4: # SSL key and cert file ssl_key_file = server_cfg[3] ssl_cert_file = server_cfg[4] servers.add(XMLRPCServer(auth_method, True, use_tcp = use_tcp, ssl_key_file = ssl_key_file, ssl_cert_file = ssl_cert_file, host = host, port = port, path = XEN_API_SOCKET, hosts_allowed = hosts_allowed)) except (ValueError, TypeError), exn: log.exception('Xen API Server init failed') log.error('Xen-API server configuration %s is invalid.', api_cfg)
def save(fd, dominfo, network, live, dst, checkpoint=False, node=-1, sock=None, name=None, diskonly=False): from xen.xend import XendDomain try: if not os.path.isdir("/var/lib/xen"): os.makedirs("/var/lib/xen") except Exception, exn: log.exception("Can't create directory '/var/lib/xen'") raise XendError("Can't create directory '/var/lib/xen'")
def watchMain(): while True: try: we = xs.read_watch() watch = we[1] res = watch.fn(we[0], *watch.args, **watch.kwargs) if not res: watch.unwatch() except: log.exception("read_watch failed")
def unwatchAerState(self): """Remove the watch on the domain's aerState node, if any.""" try: try: if self.aerStateWatch: self.aerStateWatch.unwatch() finally: self.aerStateWatch = None except: log.exception("Unwatching aerState failed.")
def domain_restore_fd(self, fd): """Restore a domain from the given file descriptor.""" try: return XendCheckpoint.restore(self, fd) except: # I don't really want to log this exception here, but the error # handling in the relocation-socket handling code (relocate.py) is # poor, so we need to log this for debugging. log.exception("Restore failed") raise
def destroyDeviceModel(self): if self.device_model is None: return self.sentinel_lock.acquire() try: stubdomid = self.vm.getStubdomDomid() if stubdomid is not None: from xen.xend import XendDomain XendDomain.instance().domain_destroy(stubdomid) elif self.pid: try: os.kill(self.pid, signal.SIGHUP) except OSError, exn: log.exception(exn) # Try to reap the child every 100ms for 10s. Then SIGKILL it. for i in xrange(100): try: (p, rv) = os.waitpid(self.pid, os.WNOHANG) if p == self.pid: break except OSError: # This is expected if Xend has been restarted within # the life of this domain. In this case, we can kill # the process, but we can't wait for it because it's # not our child. We continue this loop, and after it is # terminated make really sure the process is going away # (SIGKILL). pass time.sleep(0.1) else: log.warning("DeviceModel %d took more than 10s " "to terminate: sending SIGKILL" % self.pid) try: os.kill(self.pid, signal.SIGKILL) os.waitpid(self.pid, 0) except OSError: # This happens if the process doesn't exist. pass finally: self.pid = None self.sentinel_lock.release() state = xstransact.Remove("/local/domain/0/device-model/%i" % self.vm.getDomid()) try: os.unlink('/var/run/tap/qemu-read-%d' % self.vm.getDomid()) os.unlink('/var/run/tap/qemu-write-%d' % self.vm.getDomid()) except: pass try: del sentinel_fifos_inuse[self.sentinel_path_fifo] os.unlink(self.sentinel_path_fifo) except: pass
def destroyDeviceModel(self): if self.device_model is None: return self.sentinel_lock.acquire() try: stubdomid = self.vm.getStubdomDomid() if stubdomid is not None : from xen.xend import XendDomain XendDomain.instance().domain_destroy(stubdomid) elif self.pid: try: os.kill(self.pid, signal.SIGHUP) except OSError, exn: log.exception(exn) # Try to reap the child every 100ms for 10s. Then SIGKILL it. for i in xrange(100): try: (p, rv) = os.waitpid(self.pid, os.WNOHANG) if p == self.pid: break except OSError: # This is expected if Xend has been restarted within # the life of this domain. In this case, we can kill # the process, but we can't wait for it because it's # not our child. We continue this loop, and after it is # terminated make really sure the process is going away # (SIGKILL). pass time.sleep(0.1) else: log.warning("DeviceModel %d took more than 10s " "to terminate: sending SIGKILL" % self.pid) try: os.kill(self.pid, signal.SIGKILL) os.waitpid(self.pid, 0) except OSError: # This happens if the process doesn't exist. pass finally: self.pid = None self.sentinel_lock.release() state = xstransact.Remove("/local/domain/0/device-model/%i" % self.vm.getDomid()) try: os.unlink('/var/run/tap/qemu-read-%d' % self.vm.getDomid()) os.unlink('/var/run/tap/qemu-write-%d' % self.vm.getDomid()) except: pass try: del sentinel_fifos_inuse[self.sentinel_path_fifo] os.unlink(self.sentinel_path_fifo) except: pass
def unregister_shutdown_watch(self): """Remove the watch on the control/shutdown, if any. Nothrow guarantee.""" try: if self.shutdownWatch: self.shutdownWatch.unwatch() except: log.exception("Unwatching hvm shutdown watch failed.") self.shutdownWatch = None log.debug("hvm shutdown watch unregistered")
def __init__(self): # Table of vnet info indexed by vnet id. self.vnet = {} listing = xstransact.List(self.dbpath) for entry in listing: try: info = XendVnetInfo(self.dbpath + '/' + entry) self.vnet[info.id] = info info.configure() except XendError, ex: log.warning("Failed to configure vnet %s: %s", str(info.id), str(ex)) except Exception, ex: log.exception("Vnet error") xstransact.Remove(self.dbpath + '/' + entry)
def destroyDeviceModel(self): if self.device_model is None: return if self.pid: try: os.kill(self.pid, signal.SIGKILL) except OSError, exn: log.exception(exn) try: os.waitpid(self.pid, 0) except OSError, exn: # This is expected if Xend has been restarted within the # life of this domain. In this case, we can kill the process, # but we can't wait for it because it's not our child. pass
class TCPXMLRPCServer(SocketServer.ThreadingMixIn, SimpleXMLRPCServer): allow_reuse_address = True def _marshaled_dispatch(self, data, dispatch_method=None): params, method = xmlrpclib.loads(data) try: if dispatch_method is not None: response = dispatch_method(method, params) else: response = self._dispatch(method, params) # With either Unicode or normal strings, we can only transmit # \t, \n, \r, \u0020-\ud7ff, \ue000-\ufffd, and \u10000-\u10ffff # in an XML document. xmlrpclib does not escape these values # properly, and then breaks when it comes to parse the document. # To hack around this problem, we use repr here and exec above # to transmit the string using Python encoding. # Thanks to David Mertz <*****@*****.**> for the trick (buried # in xml_pickle.py). if (isinstance(response, types.StringType) or isinstance(response, unicode)): response = repr(response)[1:-1] response = (response, ) response = xmlrpclib.dumps(response, methodresponse=1, allow_none=1) except xmlrpclib.Fault, fault: response = xmlrpclib.dumps(fault) except Exception, exn: log.exception(exn) response = xmlrpclib.dumps( xmlrpclib.Fault(xen.xend.XendClient.ERROR_INTERNAL, str(exn)))
def wait_devs(dominfo): from xen.xend import XendDomain lock = True; try: XendDomain.instance().domains_lock.release() except: lock = False; try: dominfo.waitForDevices() # Wait for backends to set up except Exception, exn: log.exception(exn) if lock: XendDomain.instance().domains_lock.acquire() raise
def watchMain(): while True: try: we = xs.read_watch() watch = we[1] res = watch.fn(we[0], *watch.args, **watch.kwargs) if not res: try: watch.unwatch() except RuntimeError, exn: if exn.args[0] == errno.ENOENT: # The watch has already been unregistered -- that's # fine. pass else: raise except: log.exception("read_watch failed")
def _marshaled_dispatch(self, data, dispatch_method=None): params, method = xmlrpclib.loads(data) if False: # Enable this block of code to exit immediately without sending # a response. This allows you to test client-side crash handling. import sys sys.exit(1) try: if dispatch_method is not None: response = dispatch_method(method, params) else: response = self._dispatch(method, params) if self.xenapi and (response is None or not isinstance(response, dict) or "Status" not in response): log.exception("Internal error handling %s: Invalid result %s", method, response) response = { "Status": "Failure", "ErrorDescription": ["INTERNAL_ERROR", "Invalid result %s handling %s" % (response, method)], } # With either Unicode or normal strings, we can only transmit # \t, \n, \r, \u0020-\ud7ff, \ue000-\ufffd, and \u10000-\u10ffff # in an XML document. xmlrpclib does not escape these values # properly, and then breaks when it comes to parse the document. # To hack around this problem, we use repr here and exec above # to transmit the string using Python encoding. # Thanks to David Mertz <*****@*****.**> for the trick (buried # in xml_pickle.py). if isinstance(response, StringTypes): response = repr(response)[1:-1] response = (response,) response = xmlrpclib.dumps(response, methodresponse=1, allow_none=1) except Exception, exn: try: if self.xenapi: if _is_not_supported(exn): errdesc = ["MESSAGE_METHOD_UNKNOWN", method] else: log.exception("Internal error handling %s", method) errdesc = ["INTERNAL_ERROR", str(exn)] response = xmlrpclib.dumps(({"Status": "Failure", "ErrorDescription": errdesc},), methodresponse=1) else: import xen.xend.XendClient if isinstance(exn, xmlrpclib.Fault): response = xmlrpclib.dumps(exn) else: log.exception("Internal error handling %s", method) response = xmlrpclib.dumps(xmlrpclib.Fault(xen.xend.XendClient.ERROR_INTERNAL, str(exn))) except: log.exception("Internal error handling error")
def run(self, status): try: log.info("Xend Daemon started") #xc = xen.lowlevel.xc.xc() #xinfo = xc.xeninfo() #log.info("Xend changeset: %s.", xinfo['xen_changeset']) #del xc relocate.listenRelocation() servers = SrvServer.create() servers.start(status) except Exception, ex: print >>sys.stderr, 'Exception starting xend:', ex if XEND_DEBUG: traceback.print_exc() log.exception("Exception starting xend (%s)" % ex) if status: status.write('1') status.close() sys.exit(1)
def refresh(self, initialising = False): """Refresh domain list from Xen. Expects to be protected by the domains_lock. @param initialising True if this is the first refresh after starting Xend. This does not change this method's behaviour, except for logging. """ doms = self.xen_domains() for d in self.domains.values(): info = doms.get(d.getDomid()) if info: d.update(info) else: self._delete_domain(d.getDomid()) for d in doms: if d not in self.domains: if doms[d]['dying']: log.log(initialising and logging.ERROR or logging.DEBUG, 'Cannot recreate information for dying domain %d.' ' Xend will ignore this domain from now on.', doms[d]['dom']) elif d == PRIV_DOMAIN: log.fatal( "No record of privileged domain %d! Terminating.", d) sys.exit(1) else: try: self._add_domain( XendDomainInfo.recreate(doms[d], False)) except: log.exception( "Failed to recreate information for domain " "%d. Destroying it in the hope of " "recovery.", d) try: xc.domain_destroy(d) except: log.exception('Destruction of %d failed.', d)
def destroyDevice(self, devid, force): """Destroy the specified device. @param devid The device ID, or something device-specific from which the device ID can be determined (such as a guest-side device name). The implementation here simply deletes the appropriate paths from the store. This may be overridden by subclasses who need to perform other tasks on destruction. The implementation here accepts integer device IDs or paths containg integer deviceIDs, e.g. vfb/0. Subclasses may accept other values and convert them to integers before passing them here. """ dev = self.convertToDeviceNumber(devid) # Modify online status /before/ updating state (latter is watched by # drivers, so this ordering avoids a race). try: self.writeBackend(dev, 'online', "0") self.writeBackend(dev, 'state', str(xenbusState['Closing'])) except VmError, vm_err: log.exception(vm_err) force = True
def _loadConfig(servers, root, reload): if xoptions.get_xend_http_server(): servers.add( HttpServer(root, xoptions.get_xend_address(), xoptions.get_xend_port())) if xoptions.get_xend_unix_server(): path = xoptions.get_xend_unix_path() log.info('unix path=' + path) servers.add(UnixHttpServer(root, path)) api_cfg = xoptions.get_xen_api_server() if api_cfg: try: for server_cfg in api_cfg: # Parse the xen-api-server config ssl_key_file = None ssl_cert_file = None auth_method = XendAPI.AUTH_NONE hosts_allowed = None host_addr = server_cfg[0].split(':', 1) if len(host_addr) == 1: if host_addr[0].lower() == 'unix': use_tcp = False host = 'localhost' port = 0 else: use_tcp = True host = '' port = int(host_addr[0]) else: use_tcp = True host = str(host_addr[0]) port = int(host_addr[1]) if len(server_cfg) > 1: if server_cfg[1] in [XendAPI.AUTH_PAM, XendAPI.AUTH_NONE]: auth_method = server_cfg[1] if len(server_cfg) > 2 and len(server_cfg[2]): hosts_allowed = map(re.compile, server_cfg[2].split(' ')) if len(server_cfg) > 4: # SSL key and cert file ssl_key_file = server_cfg[3] ssl_cert_file = server_cfg[4] servers.add( XMLRPCServer(auth_method, True, use_tcp=use_tcp, ssl_key_file=ssl_key_file, ssl_cert_file=ssl_cert_file, host=host, port=port, path=XEN_API_SOCKET, hosts_allowed=hosts_allowed)) except (ValueError, TypeError), exn: log.exception('Xen API Server init failed') log.error('Xen-API server configuration %s is invalid.', api_cfg)
list.insert (i+1, value) return def save(fd, fds, dominfo, network, live, dst, checkpoint=False, node=-1,sock=None): from xen.xend import XendDomain # Multi ip flag is_multi = False if not fds.empty(): is_multi = True try: if not os.path.isdir("/var/lib/xen"): os.makedirs("/var/lib/xen") except Exception, exn: log.exception("Can't create directory '/var/lib/xen'") raise XendError("Can't create directory '/var/lib/xen'") if is_multi: for d in fds: write_exact(d, SIGNATURE, "could not write guest state file: signature") else: write_exact(fd, SIGNATURE, "could not write guest state file: signature") sxprep = dominfo.sxpr() if node > -1: insert_after(sxprep,'vcpus',['node', str(node)]) for device_sxp in sxp.children(sxprep, 'device'): backend = sxp.child(device_sxp[1], 'backend')
def save(fd, dominfo, network, live, dst, checkpoint=False): write_exact(fd, SIGNATURE, "could not write guest state file: signature") # CoW timing checkpointtime = [] downtime = [] buf_list = [] # Cow timing checkpointtime.append(time.time()) config = sxp.to_string(dominfo.sxpr()) domain_name = dominfo.getName() # Rename the domain temporarily, so that we don't get a name clash if this # domain is migrating (live or non-live) to the local host. Doing such a # thing is useful for debugging. dominfo.setName('migrating-' + domain_name) try: dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name) write_exact(fd, pack("!i", len(config)), "could not write guest state file: config len") write_exact(fd, config, "could not write guest state file: config") image_cfg = dominfo.info.get('image', {}) hvm = dominfo.info.is_hvm() # xc_save takes three customization parameters: maxit, max_f, and # flags the last controls whether or not save is 'live', while the # first two further customize behaviour when 'live' save is # enabled. Passing "0" simply uses the defaults compiled into # libxenguest; see the comments and/or code in xc_linux_save() for # more information. cmd = [ xen.util.auxbin.pathTo(XC_SAVE), str(fd), str(dominfo.getDomid()), "0", "0", str(int(live) | (int(hvm) << 2)) ] def saveInputHandler(line, tochild): if line == "suspend": dominfo.shutdown('suspend') # CoW timing downtime.append(time.time()) dominfo.waitForShutdown() dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2, domain_name) dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3, domain_name) if hvm: dominfo.image.saveDeviceModel() # for CoW purposes, get qemu-dm state qemu_fd = os.open( "/var/lib/xen/qemu-save.%d" % dominfo.getDomid(), os.O_RDONLY) while True: buf = os.read(qemu_fd, dm_batch) if len(buf): buf_list.append(buf) else: break os.close(qemu_fd) # Cow: snapshot VBD os.system("/etc/xen/scripts/snapshot-vbd.sh %s" % os.path.basename(dst)) log.debug('Performed VBD snapshot') tochild.write("done\n") tochild.flush() if line == "restart": global down_end log.debug("Restarting %d ...", dominfo.getDomid()) dominfo.resumeDomain(downtime) # CoW timing downtime.append(time.time()) tochild.write("done\n") tochild.flush() forkHelper(cmd, fd, saveInputHandler, False) # put qemu device model state if os.path.exists("/var/lib/xen/qemu-save.%d" % dominfo.getDomid()): os.remove("/var/lib/xen/qemu-save.%d" % dominfo.getDomid()) write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature") for buf in buf_list: if len(buf): write_exact(fd, buf, "could not write device model state") else: break try: dominfo.setName(domain_name) except VmError: # Ignore this. The name conflict (hopefully) arises because we # are doing localhost migration; if we are doing a suspend of a # persistent VM, we need the rename, and don't expect the # conflict. This needs more thought. pass # CoW timing checkpointtime.append(time.time()) log.debug("[downtime] %s", downtime[2] - downtime[0]) log.debug("[checkpoint_time] %s", checkpointtime[1] - checkpointtime[0]) except Exception, exn: log.exception("Save failed on domain %s (%s) - resuming.", domain_name, dominfo.getDomid()) dominfo.resumeDomain([]) try: dominfo.setName(domain_name) except: log.exception("Failed to reset the migrating domain's name") raise exn
def save(fd, dominfo, network, live, dst, checkpoint=False, node=-1): write_exact(fd, SIGNATURE, "could not write guest state file: signature") sxprep = dominfo.sxpr() if node > -1: insert_after(sxprep,'vcpus',['node', str(node)]) config = sxp.to_string(sxprep) domain_name = dominfo.getName() # Rename the domain temporarily, so that we don't get a name clash if this # domain is migrating (live or non-live) to the local host. Doing such a # thing is useful for debugging. dominfo.setName('migrating-' + domain_name) try: dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name) write_exact(fd, pack("!i", len(config)), "could not write guest state file: config len") write_exact(fd, config, "could not write guest state file: config") image_cfg = dominfo.info.get('image', {}) hvm = dominfo.info.is_hvm() # xc_save takes three customization parameters: maxit, max_f, and # flags the last controls whether or not save is 'live', while the # first two further customize behaviour when 'live' save is # enabled. Passing "0" simply uses the defaults compiled into # libxenguest; see the comments and/or code in xc_linux_save() for # more information. cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd), str(dominfo.getDomid()), "0", "0", str(int(live) | (int(hvm) << 2)) ] log.debug("[xc_save]: %s", string.join(cmd)) def saveInputHandler(line, tochild): log.debug("In saveInputHandler %s", line) if line == "suspend": log.debug("Suspending %d ...", dominfo.getDomid()) dominfo.shutdown('suspend') dominfo.waitForShutdown() if line in ('suspend', 'suspended'): dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2, domain_name) log.info("Domain %d suspended.", dominfo.getDomid()) dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3, domain_name) if hvm: dominfo.image.saveDeviceModel() if line == "suspend": tochild.write("done\n") tochild.flush() log.debug('Written done') forkHelper(cmd, fd, saveInputHandler, False) # put qemu device model state if os.path.exists("/var/lib/xen/qemu-save.%d" % dominfo.getDomid()): write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature") qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(), os.O_RDONLY) while True: buf = os.read(qemu_fd, dm_batch) if len(buf): write_exact(fd, buf, "could not write device model state") else: break os.close(qemu_fd) os.remove("/var/lib/xen/qemu-save.%d" % dominfo.getDomid()) if checkpoint: dominfo.resumeDomain() else: dominfo.destroy() dominfo.testDeviceComplete() try: dominfo.setName(domain_name, False) except VmError: # Ignore this. The name conflict (hopefully) arises because we # are doing localhost migration; if we are doing a suspend of a # persistent VM, we need the rename, and don't expect the # conflict. This needs more thought. pass except Exception, exn: log.exception("Save failed on domain %s (%s) - resuming.", domain_name, dominfo.getDomid()) dominfo.resumeDomain() try: dominfo.setName(domain_name) except: log.exception("Failed to reset the migrating domain's name") raise exn
def restore(xd, fd, dominfo=None, paused=False, relocating=False): signature = read_exact(fd, len(SIGNATURE), "not a valid guest state file: signature read") if signature != SIGNATURE: raise XendError("not a valid guest state file: found '%s'" % signature) l = read_exact(fd, sizeof_int, "not a valid guest state file: config size read") vmconfig_size = unpack("!i", l)[0] vmconfig_buf = read_exact(fd, vmconfig_size, "not a valid guest state file: config read") p = sxp.Parser() p.input(vmconfig_buf) if not p.ready: raise XendError("not a valid guest state file: config parse") vmconfig = p.get_val() if not relocating: domconfig = XendConfig(sxp_obj=vmconfig) othervm = xd.domain_lookup_nr(domconfig["name_label"]) if othervm is None or othervm.domid is None: othervm = xd.domain_lookup_nr(domconfig["uuid"]) if othervm is not None and othervm.domid is not None: raise VmError("Domain '%s' already exists with ID '%d'" % (domconfig["name_label"], othervm.domid)) if dominfo: dominfo.resume() else: dominfo = xd.restore_(vmconfig) # repin domain vcpus if a target node number was specified # this is done prior to memory allocation to aide in memory # distribution for NUMA systems. nodenr = -1 for i, l in enumerate(vmconfig): if type(l) == type([]): if l[0] == 'node': nodenr = int(l[1]) if nodenr >= 0: node_to_cpu = XendNode.instance().xc.physinfo()['node_to_cpu'] if nodenr < len(node_to_cpu): for v in range(0, dominfo.info['VCPUs_max']): xc.vcpu_setaffinity(dominfo.domid, v, node_to_cpu[nodenr]) store_port = dominfo.getStorePort() console_port = dominfo.getConsolePort() assert store_port assert console_port # if hvm, pass mem size to calculate the store_mfn image_cfg = dominfo.info.get('image', {}) is_hvm = dominfo.info.is_hvm() if is_hvm: apic = int(dominfo.info['platform'].get('apic', 0)) pae = int(dominfo.info['platform'].get('pae', 0)) log.info("restore hvm domain %d, apic=%d, pae=%d", dominfo.domid, apic, pae) else: apic = 0 pae = 0 try: restore_image = image.create(dominfo, dominfo.info) memory = restore_image.getRequiredAvailableMemory( dominfo.info['memory_dynamic_max'] / 1024) maxmem = restore_image.getRequiredAvailableMemory( dominfo.info['memory_static_max'] / 1024) shadow = restore_image.getRequiredShadowMemory( dominfo.info['shadow_memory'] * 1024, dominfo.info['memory_static_max'] / 1024) log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, ", dominfo.info['shadow_memory'], dominfo.info['memory_static_max'], dominfo.info['memory_static_min']) # Round shadow up to a multiple of a MiB, as shadow_mem_control # takes MiB and we must not round down and end up under-providing. shadow = ((shadow + 1023) / 1024) * 1024 # set memory limit xc.domain_setmaxmem(dominfo.getDomid(), maxmem) balloon.free(memory + shadow) shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow / 1024) dominfo.info['shadow_memory'] = shadow_cur cmd = map(str, [ xen.util.auxbin.pathTo(XC_RESTORE), fd, dominfo.getDomid(), store_port, console_port, int(is_hvm), pae, apic ]) log.debug("[xc_restore]: %s", string.join(cmd)) handler = RestoreInputHandler() forkHelper(cmd, fd, handler.handler, True) # We don't want to pass this fd to any other children -- we # might need to recover the disk space that backs it. try: flags = fcntl.fcntl(fd, fcntl.F_GETFD) flags |= fcntl.FD_CLOEXEC fcntl.fcntl(fd, fcntl.F_SETFD, flags) except: pass if handler.store_mfn is None: raise XendError('Could not read store MFN') if not is_hvm and handler.console_mfn is None: raise XendError('Could not read console MFN') # get qemu state and create a tmp file for dm restore # Even PV guests may have QEMU stat, but its not currently # used so only bother with HVM currently. if is_hvm: qemu_signature = read_exact(fd, len(QEMU_SIGNATURE), "invalid device model signature read") if qemu_signature != QEMU_SIGNATURE: raise XendError("not a valid device model state: found '%s'" % qemu_signature) qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(), os.O_WRONLY | os.O_CREAT | os.O_TRUNC) while True: buf = os.read(fd, dm_batch) if len(buf): write_exact(qemu_fd, buf, "could not write dm state to tmp file") else: break os.close(qemu_fd) restore_image.setCpuid() os.read(fd, 1) # Wait for source to close connection dominfo.completeRestore(handler.store_mfn, handler.console_mfn) # # We shouldn't hold the domains_lock over a waitForDevices # As this function sometime gets called holding this lock, # we must release it and re-acquire it appropriately # from xen.xend import XendDomain lock = True try: XendDomain.instance().domains_lock.release() except: lock = False try: dominfo.waitForDevices() # Wait for backends to set up except Exception, exn: log.exception(exn) if lock: XendDomain.instance().domains_lock.acquire() if not paused: dominfo.unpause() return dominfo
def _marshaled_dispatch(self, data, dispatch_method=None): params, method = xmlrpclib.loads(data) if False: # Enable this block of code to exit immediately without sending # a response. This allows you to test client-side crash handling. import sys sys.exit(1) try: if dispatch_method is not None: response = dispatch_method(method, params) else: response = self._dispatch(method, params) if self.xenapi and \ (response is None or not isinstance(response, dict) or 'Status' not in response): log.exception('Internal error handling %s: Invalid result %s', method, response) response = { "Status": "Failure", "ErrorDescription": [ 'INTERNAL_ERROR', 'Invalid result %s handling %s' % (response, method) ] } # With either Unicode or normal strings, we can only transmit # \t, \n, \r, \u0020-\ud7ff, \ue000-\ufffd, and \u10000-\u10ffff # in an XML document. xmlrpclib does not escape these values # properly, and then breaks when it comes to parse the document. # To hack around this problem, we use repr here and exec above # to transmit the string using Python encoding. # Thanks to David Mertz <*****@*****.**> for the trick (buried # in xml_pickle.py). if isinstance(response, StringTypes): response = repr(response)[1:-1] response = (response, ) response = xmlrpclib.dumps(response, methodresponse=1, allow_none=1) except Exception, exn: try: if self.xenapi: if _is_not_supported(exn): errdesc = ['MESSAGE_METHOD_UNKNOWN', method] else: log.exception('Internal error handling %s', method) errdesc = ['INTERNAL_ERROR', str(exn)] response = xmlrpclib.dumps(({ "Status": "Failure", "ErrorDescription": errdesc }, ), methodresponse=1) else: import xen.xend.XendClient if isinstance(exn, xmlrpclib.Fault): response = xmlrpclib.dumps(exn) else: log.exception('Internal error handling %s', method) response = xmlrpclib.dumps( xmlrpclib.Fault(xen.xend.XendClient.ERROR_INTERNAL, str(exn))) except: log.exception('Internal error handling error')
def save(fd, dominfo, network, live, dst, checkpoint=False, node=-1): write_exact(fd, SIGNATURE, "could not write guest state file: signature") sxprep = dominfo.sxpr() if node > -1: insert_after(sxprep, 'vcpus', ['node', str(node)]) config = sxp.to_string(sxprep) domain_name = dominfo.getName() # Rename the domain temporarily, so that we don't get a name clash if this # domain is migrating (live or non-live) to the local host. Doing such a # thing is useful for debugging. dominfo.setName('migrating-' + domain_name) try: dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name) write_exact(fd, pack("!i", len(config)), "could not write guest state file: config len") write_exact(fd, config, "could not write guest state file: config") image_cfg = dominfo.info.get('image', {}) hvm = dominfo.info.is_hvm() # xc_save takes three customization parameters: maxit, max_f, and # flags the last controls whether or not save is 'live', while the # first two further customize behaviour when 'live' save is # enabled. Passing "0" simply uses the defaults compiled into # libxenguest; see the comments and/or code in xc_linux_save() for # more information. cmd = [ xen.util.auxbin.pathTo(XC_SAVE), str(fd), str(dominfo.getDomid()), "0", "0", str(int(live) | (int(hvm) << 2)) ] log.debug("[xc_save]: %s", string.join(cmd)) def saveInputHandler(line, tochild): log.debug("In saveInputHandler %s", line) if line == "suspend": log.debug("Suspending %d ...", dominfo.getDomid()) dominfo.shutdown('suspend') dominfo.waitForShutdown() if line in ('suspend', 'suspended'): dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2, domain_name) log.info("Domain %d suspended.", dominfo.getDomid()) dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3, domain_name) if hvm: dominfo.image.saveDeviceModel() if line == "suspend": tochild.write("done\n") tochild.flush() log.debug('Written done') forkHelper(cmd, fd, saveInputHandler, False) # put qemu device model state if os.path.exists("/var/lib/xen/qemu-save.%d" % dominfo.getDomid()): write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature") qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(), os.O_RDONLY) while True: buf = os.read(qemu_fd, dm_batch) if len(buf): write_exact(fd, buf, "could not write device model state") else: break os.close(qemu_fd) os.remove("/var/lib/xen/qemu-save.%d" % dominfo.getDomid()) if checkpoint: dominfo.resumeDomain() else: dominfo.destroy() dominfo.testDeviceComplete() try: dominfo.setName(domain_name, False) except VmError: # Ignore this. The name conflict (hopefully) arises because we # are doing localhost migration; if we are doing a suspend of a # persistent VM, we need the rename, and don't expect the # conflict. This needs more thought. pass except Exception, exn: log.exception("Save failed on domain %s (%s) - resuming.", domain_name, dominfo.getDomid()) dominfo.resumeDomain() try: dominfo.setName(domain_name) except: log.exception("Failed to reset the migrating domain's name") raise exn
def start(self, status): # Running the network script will spawn another process, which takes # the status fd with it unless we set FD_CLOEXEC. Failing to do this # causes the read in SrvDaemon to hang even when we have written here. if status: fcntl.fcntl(status, fcntl.F_SETFD, fcntl.FD_CLOEXEC) # Prepare to catch SIGTERM (received when 'xend stop' is executed) # and call each server's cleanup if possible signal.signal(signal.SIGTERM, self.cleanup) signal.signal(signal.SIGHUP, self.reloadConfig) while True: threads = [] for server in self.servers: if server.ready: continue thread = Thread(target=server.run, name=server.__class__.__name__) thread.setDaemon(True) thread.start() threads.append(thread) # check for when all threads have initialized themselves and then # close the status pipe retryCount = 0 threads_left = True while threads_left: threads_left = False for server in self.servers: if not server.ready: threads_left = True break if threads_left: time.sleep(.5) retryCount += 1 if retryCount > 60: for server in self.servers: if not server.ready: log.error("Server " + server.__class__.__name__ + " did not initialise!") break if status: status.write('0') status.close() status = None # Reaching this point means we can auto start domains try: xenddomain().autostart_domains() except Exception, e: log.exception("Failed while autostarting domains") # loop to keep main thread alive until it receives a SIGTERM self.running = True while self.running: time.sleep(100000000) if self.reloadingConfig: log.info("Restarting all XML-RPC and Xen-API servers...") self.cleaningUp = False self.reloadingConfig = False xoptions.set_config() self.servers = [] _loadConfig(self, self.root, True) else: break
pass sock.close() dominfo.destroy() dominfo.testDeviceComplete() try: dominfo.setName(domain_name, False) except VmError: # Ignore this. The name conflict (hopefully) arises because we # are doing localhost migration; if we are doing a suspend of a # persistent VM, we need the rename, and don't expect the # conflict. This needs more thought. pass except Exception, exn: log.exception("Save failed on domain %s (%s) - resuming.", domain_name, dominfo.getDomid()) dominfo.resumeDomain() try: dominfo.setName(domain_name) except: log.exception("Failed to reset the migrating domain's name") raise exn def restore(xd, fd, dominfo = None, paused = False, relocating = False): try: if not os.path.isdir("/var/lib/xen"): os.makedirs("/var/lib/xen") except Exception, exn:
dominfo.destroy() dominfo.testDeviceComplete() try: if checkpoint: dominfo.setName(domain_name) else: dominfo.setName(domain_name, False) except VmError: # Ignore this. The name conflict (hopefully) arises because we # are doing localhost migration; if we are doing a suspend of a # persistent VM, we need the rename, and don't expect the # conflict. This needs more thought. pass except Exception, exn: log.exception("Save failed on domain %s (%s) - resuming.", domain_name, dominfo.getDomid()) dominfo.resumeDomain() # Reacquire the domain lock if checkpoint == False: dominfo.acquire_running_lock() try: dominfo.setName(domain_name) except: log.exception("Failed to reset the migrating domain's name") raise exn def restore(xd, fd, dominfo=None, paused=False, relocating=False): try:
class XMLRPCServer: def __init__(self, auth, use_xenapi, use_tcp = False, ssl_key_file = None, ssl_cert_file = None, host = "localhost", port = 8006, path = XML_RPC_SOCKET, hosts_allowed = None): self.use_tcp = use_tcp self.port = port self.host = host self.path = path self.hosts_allowed = hosts_allowed self.ssl_key_file = ssl_key_file self.ssl_cert_file = ssl_cert_file self.ready = False self.running = True self.auth = auth self.xenapi = use_xenapi and XendAPI.XendAPI(auth) or None def run(self): authmsg = (self.auth == XendAPI.AUTH_NONE and "; authentication has been disabled for this server." or ".") try: if self.use_tcp: using_ssl = self.ssl_key_file and self.ssl_cert_file log.info("Opening %s XML-RPC server on %s%d%s", using_ssl and 'HTTPS' or 'TCP', self.host and '%s:' % self.host or 'all interfaces, port ', self.port, authmsg) if using_ssl: if not ssl_enabled: raise ValueError("pyOpenSSL not installed. " "Unable to start HTTPS XML-RPC server") self.server = SSLXMLRPCServer( (self.host, self.port), self.hosts_allowed, self.xenapi is not None, logRequests = False, ssl_key_file = self.ssl_key_file, ssl_cert_file = self.ssl_cert_file) else: self.server = TCPXMLRPCServer( (self.host, self.port), self.hosts_allowed, self.xenapi is not None, logRequests = False) else: log.info("Opening Unix domain socket XML-RPC server on %s%s", self.path, authmsg) self.server = UnixXMLRPCServer(self.path, self.hosts_allowed, self.xenapi is not None, logRequests = False) except socket.error, exn: log.error('Cannot start server: %s!', exn.args[1]) ready = True running = False return except Exception, e: log.exception('Cannot start server: %s!', e) ready = True running = False return
def start(self, status): # Running the network script will spawn another process, which takes # the status fd with it unless we set FD_CLOEXEC. Failing to do this # causes the read in SrvDaemon to hang even when we have written here. if status: fcntl.fcntl(status, fcntl.F_SETFD, fcntl.FD_CLOEXEC) # Prepare to catch SIGTERM (received when 'xend stop' is executed) # and call each server's cleanup if possible signal.signal(signal.SIGTERM, self.cleanup) signal.signal(signal.SIGHUP, self.reloadConfig) while True: threads = [] for server in self.servers: if server.ready: continue thread = Thread(target=server.run, name=server.__class__.__name__) thread.setDaemon(True) thread.start() threads.append(thread) # check for when all threads have initialized themselves and then # close the status pipe retryCount = 0 threads_left = True while threads_left: threads_left = False for server in self.servers: if not server.ready: threads_left = True break if threads_left: time.sleep(.5) retryCount += 1 if retryCount > 60: for server in self.servers: if not server.ready: log.error("Server " + server.__class__.__name__ + " did not initialise!") break if status: status.write('0') status.close() status = None # auto start pools before domains are started try: XendCPUPool.autostart_pools() except Exception, e: log.exception("Failed while autostarting pools") # Reaching this point means we can auto start domains try: xenddomain().autostart_domains() except Exception, e: log.exception("Failed while autostarting domains")
def restore(xd, fd, dominfo = None, paused = False): signature = read_exact(fd, len(SIGNATURE), "not a valid guest state file: signature read") if signature != SIGNATURE: raise XendError("not a valid guest state file: found '%s'" % signature) l = read_exact(fd, sizeof_int, "not a valid guest state file: config size read") vmconfig_size = unpack("!i", l)[0] vmconfig_buf = read_exact(fd, vmconfig_size, "not a valid guest state file: config read") p = sxp.Parser() p.input(vmconfig_buf) if not p.ready: raise XendError("not a valid guest state file: config parse") vmconfig = p.get_val() if dominfo: dominfo.resume() else: dominfo = xd.restore_(vmconfig) store_port = dominfo.getStorePort() console_port = dominfo.getConsolePort() assert store_port assert console_port # if hvm, pass mem size to calculate the store_mfn image_cfg = dominfo.info.get('image', {}) is_hvm = dominfo.info.is_hvm() if is_hvm: apic = int(dominfo.info['platform'].get('apic', 0)) pae = int(dominfo.info['platform'].get('pae', 0)) log.info("restore hvm domain %d, apic=%d, pae=%d", dominfo.domid, apic, pae) else: apic = 0 pae = 0 try: restore_image = image.create(dominfo, dominfo.info) memory = restore_image.getRequiredAvailableMemory( dominfo.info['memory_dynamic_max'] / 1024) maxmem = restore_image.getRequiredAvailableMemory( dominfo.info['memory_static_max'] / 1024) shadow = restore_image.getRequiredShadowMemory( dominfo.info['shadow_memory'] * 1024, dominfo.info['memory_static_max'] / 1024) log.debug("restore:shadow=0x%x, _static_max=0x%x, _static_min=0x%x, ", dominfo.info['shadow_memory'], dominfo.info['memory_static_max'], dominfo.info['memory_static_min']) # Round shadow up to a multiple of a MiB, as shadow_mem_control # takes MiB and we must not round down and end up under-providing. shadow = ((shadow + 1023) / 1024) * 1024 # set memory limit xc.domain_setmaxmem(dominfo.getDomid(), maxmem) balloon.free(memory + shadow) shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow / 1024) dominfo.info['shadow_memory'] = shadow_cur cmd = map(str, [xen.util.auxbin.pathTo(XC_RESTORE), fd, dominfo.getDomid(), store_port, console_port, int(is_hvm), pae, apic]) log.debug("[xc_restore]: %s", string.join(cmd)) handler = RestoreInputHandler() forkHelper(cmd, fd, handler.handler, True) # We don't want to pass this fd to any other children -- we # might need to recover the disk space that backs it. try: flags = fcntl.fcntl(fd, fcntl.F_GETFD) flags |= fcntl.FD_CLOEXEC fcntl.fcntl(fd, fcntl.F_SETFD, flags) except: pass if handler.store_mfn is None: raise XendError('Could not read store MFN') if not is_hvm and handler.console_mfn is None: raise XendError('Could not read console MFN') # get qemu state and create a tmp file for dm restore # Even PV guests may have QEMU stat, but its not currently # used so only bother with HVM currently. if is_hvm: qemu_signature = read_exact(fd, len(QEMU_SIGNATURE), "invalid device model signature read") if qemu_signature != QEMU_SIGNATURE: raise XendError("not a valid device model state: found '%s'" % qemu_signature) qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(), os.O_WRONLY | os.O_CREAT | os.O_TRUNC) while True: buf = os.read(fd, dm_batch) if len(buf): write_exact(qemu_fd, buf, "could not write dm state to tmp file") else: break os.close(qemu_fd) os.read(fd, 1) # Wait for source to close connection dominfo.completeRestore(handler.store_mfn, handler.console_mfn) # # We shouldn't hold the domains_lock over a waitForDevices # As this function sometime gets called holding this lock, # we must release it and re-acquire it appropriately # from xen.xend import XendDomain lock = True; try: XendDomain.instance().domains_lock.release() except: lock = False; try: dominfo.waitForDevices() # Wait for backends to set up except Exception, exn: log.exception(exn) if lock: XendDomain.instance().domains_lock.acquire() if not paused: dominfo.unpause() return dominfo
def save(fd, dominfo, network, live, dst, checkpoint=False): write_exact(fd, SIGNATURE, "could not write guest state file: signature") # CoW timing checkpointtime = [] downtime = [] buf_list = [] # Cow timing checkpointtime.append(time.time()) config = sxp.to_string(dominfo.sxpr()) domain_name = dominfo.getName() # Rename the domain temporarily, so that we don't get a name clash if this # domain is migrating (live or non-live) to the local host. Doing such a # thing is useful for debugging. dominfo.setName('migrating-' + domain_name) try: dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP1, domain_name) write_exact(fd, pack("!i", len(config)), "could not write guest state file: config len") write_exact(fd, config, "could not write guest state file: config") image_cfg = dominfo.info.get('image', {}) hvm = dominfo.info.is_hvm() # xc_save takes three customization parameters: maxit, max_f, and # flags the last controls whether or not save is 'live', while the # first two further customize behaviour when 'live' save is # enabled. Passing "0" simply uses the defaults compiled into # libxenguest; see the comments and/or code in xc_linux_save() for # more information. cmd = [xen.util.auxbin.pathTo(XC_SAVE), str(fd), str(dominfo.getDomid()), "0", "0", str(int(live) | (int(hvm) << 2)) ] def saveInputHandler(line, tochild): if line == "suspend": dominfo.shutdown('suspend') # CoW timing downtime.append(time.time()) dominfo.waitForShutdown() dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2, domain_name) dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3, domain_name) if hvm: dominfo.image.saveDeviceModel() # for CoW purposes, get qemu-dm state qemu_fd = os.open("/var/lib/xen/qemu-save.%d" % dominfo.getDomid(), os.O_RDONLY) while True: buf = os.read(qemu_fd, dm_batch) if len(buf): buf_list.append(buf) else: break os.close(qemu_fd) # Cow: snapshot VBD os.system("/etc/xen/scripts/snapshot-vbd.sh %s" % os.path.basename(dst)) log.debug('Performed VBD snapshot') tochild.write("done\n") tochild.flush() if line == "restart": global down_end log.debug("Restarting %d ...", dominfo.getDomid()) dominfo.resumeDomain(downtime) # CoW timing downtime.append(time.time()) tochild.write("done\n") tochild.flush() forkHelper(cmd, fd, saveInputHandler, False) # put qemu device model state if os.path.exists("/var/lib/xen/qemu-save.%d" % dominfo.getDomid()): os.remove("/var/lib/xen/qemu-save.%d" % dominfo.getDomid()) write_exact(fd, QEMU_SIGNATURE, "could not write qemu signature") for buf in buf_list: if len(buf): write_exact(fd, buf, "could not write device model state") else: break try: dominfo.setName(domain_name) except VmError: # Ignore this. The name conflict (hopefully) arises because we # are doing localhost migration; if we are doing a suspend of a # persistent VM, we need the rename, and don't expect the # conflict. This needs more thought. pass # CoW timing checkpointtime.append(time.time()) log.debug("[downtime] %s", downtime[2] - downtime[0]) log.debug("[checkpoint_time] %s", checkpointtime[1] - checkpointtime[0]) except Exception, exn: log.exception("Save failed on domain %s (%s) - resuming.", domain_name, dominfo.getDomid()) dominfo.resumeDomain([]) try: dominfo.setName(domain_name) except: log.exception("Failed to reset the migrating domain's name") raise exn