def _manage_ssh_dir (slicename, do_mount): logger.log ("_manage_ssh_dir, requested to "+("mount" if do_mount else "umount")+" ssh dir for "+ slicename) try: root_ssh="/home/%s/.ssh"%slicename sliver_ssh="/vservers/%s/home/%s/.ssh"%(slicename,slicename) def is_mounted (root_ssh): for mount_line in file('/proc/mounts').readlines(): if mount_line.find (root_ssh)>=0: return True return False if do_mount: # any of both might not exist yet for path in [root_ssh,sliver_ssh]: if not os.path.exists (path): os.mkdir(path) if not os.path.isdir (path): raise Exception if not is_mounted(root_ssh): # xxx perform mount subprocess.call("mount --bind -o ro %s %s"%(root_ssh,sliver_ssh),shell=True) logger.log("_manage_ssh_dir: mounted %s into slice %s"%(root_ssh,slicename)) else: if is_mounted (root_ssh): # xxx perform umount subprocess.call("umount %s"%(root_ssh),shell=True) logger.log("_manage_ssh_dir: umounted %s"%(root_ssh)) except: logger.log_exc("_manage_ssh_dir with slice %s failed"%slicename)
def GetSlivers(self, config, plc): """Retrieves GetSlivers at PLC and triggers callbacks defined in modules/plugins""" try: logger.log("nodemanager: Syncing w/ PLC") # retrieve GetSlivers from PLC data = plc.GetSlivers() # use the magic 'default' slice to retrieve system-wide defaults self.getPLCDefaults(data, config) # tweak the 'vref' attribute from GetSliceFamily self.setSliversVref (data) # dump it too, so it can be retrieved later in case of comm. failure self.dumpSlivers(data) # log it for debug purposes, no matter what verbose is logger.log_slivers(data) logger.verbose("nodemanager: Sync w/ PLC done") last_data=data except: logger.log_exc("nodemanager: failed in GetSlivers") # XXX So some modules can at least boostrap. logger.log("nodemanager: Can't contact PLC to GetSlivers(). Continuing.") data = {} # for modules that request it though the 'persistent_data' property last_data=self.loadSlivers() # Invoke GetSlivers() functions from the callback modules for module in self.loaded_modules: logger.verbose('nodemanager: triggering %s.GetSlivers'%module.__name__) try: callback = getattr(module, 'GetSlivers') module_data=data if getattr(module,'persistent_data',False): module_data=last_data callback(data, config, plc) except: logger.log_exc("nodemanager: GetSlivers failed to run callback for module %r"%module)
def start(): """The database dumper daemon. When it starts up, it populates the database with the last dumped database. It proceeds to handle dump requests forever.""" def run(): global dump_requested while True: db_lock.acquire() while not dump_requested: db_cond.wait() db_pickle = cPickle.dumps(db, cPickle.HIGHEST_PROTOCOL) dump_requested = False db_lock.release() try: tools.write_file(DB_FILE, lambda f: f.write(db_pickle)) logger.log_database(db) except: logger.log_exc("database.start: failed to pickle/dump") global db try: f = open(DB_FILE) try: db = cPickle.load(f) finally: f.close() except IOError: logger.log ("database: Could not load %s -- starting from a fresh database"%DB_FILE) db = Database() except: logger.log_exc("database: failed in start") db = Database() logger.log('database.start') tools.as_daemon_thread(run)
def run_once(self, data): if data.has_key("conf_files"): for f in data['conf_files']: try: self.update_conf_file(f) except: logger.log_exc("conf_files: failed to update conf_file") else: logger.log_missing_data("conf_files.run_once",'conf_files')
def set_resources(self): disk_max = self.rspec['disk_max'] logger.log('sliver_vs: %s: setting max disk usage to %d KiB' % (self.name, disk_max)) try: # if the sliver is over quota, .set_disk_limit will throw an exception if not self.disk_usage_initialized: self.vm_running = False Sliver_VS._init_disk_info_sem.acquire() logger.log('sliver_vs: %s: computing disk usage: beginning' % self.name) # init_disk_info is inherited from VServer try: self.init_disk_info() finally: Sliver_VS._init_disk_info_sem.release() logger.log('sliver_vs: %s: computing disk usage: ended' % self.name) self.disk_usage_initialized = True vserver.VServer.set_disklimit(self, max(disk_max, self.disk_blocks)) except: logger.log_exc('sliver_vs: failed to set max disk usage',name=self.name) # get/set the min/soft/hard values for all of the vserver # related RLIMITS. Note that vserver currently only # implements support for hard limits. for limit in vserver.RLIMITS.keys(): type = limit.lower() minimum = self.rspec['%s_min'%type] soft = self.rspec['%s_soft'%type] hard = self.rspec['%s_hard'%type] update = self.set_rlimit(limit, hard, soft, minimum) if update: logger.log('sliver_vs: %s: setting rlimit %s to (%d, %d, %d)' % (self.name, type, hard, soft, minimum)) self.set_capabilities_config(self.rspec['capabilities']) if self.rspec['capabilities']: logger.log('sliver_vs: %s: setting capabilities to %s' % (self.name, self.rspec['capabilities'])) cpu_pct = self.rspec['cpu_pct'] cpu_share = self.rspec['cpu_share'] count = 1 for key in self.rspec.keys(): if key.find('sysctl.') == 0: sysctl=key.split('.') try: # /etc/vservers/<guest>/sysctl/<id>/ dirname = "/etc/vservers/%s/sysctl/%s" % (self.name, count) try: os.makedirs(dirname, 0755) except: pass setting = open("%s/setting" % dirname, "w") setting.write("%s\n" % key.lstrip("sysctl.")) setting.close() value = open("%s/value" % dirname, "w") value.write("%s\n" % self.rspec[key]) value.close() count += 1 logger.log("sliver_vs: %s: writing %s=%s"%(self.name,key,self.rspec[key])) except IOError, e: logger.log("sliver_vs: %s: could not set %s=%s"%(self.name,key,self.rspec[key])) logger.log("sliver_vs: %s: error = %s"%(self.name,e))
def fork_as(su, function, *args): """ fork(), cd / to avoid keeping unused directories open, close all nonstandard file descriptors (to avoid capturing open sockets), fork() again (to avoid zombies) and call <function> with arguments <args> in the grandchild process. If <su> is not None, set our group and user ids appropriately in the child process. """ child_pid = os.fork() if child_pid == 0: try: os.chdir('/') close_nonstandard_fds() if su: pw_ent = pwd.getpwnam(su) os.setegid(pw_ent[3]) os.seteuid(pw_ent[2]) child_pid = os.fork() if child_pid == 0: function(*args) except: os.seteuid(os.getuid()) # undo su so we can write the log file os.setegid(os.getgid()) logger.log_exc("tools: fork_as") os._exit(0) else: os.waitpid(child_pid, 0)
def _manage_ssh_dir(slicename, do_mount): logger.log("_manage_ssh_dir, requested to " + ("mount" if do_mount else "umount") + " ssh dir for " + slicename) try: root_ssh = "/home/%s/.ssh" % slicename sliver_ssh = "/vservers/%s/home/%s/.ssh" % (slicename, slicename) def is_mounted(root_ssh): for mount_line in file('/proc/mounts').readlines(): if mount_line.find(root_ssh) >= 0: return True return False if do_mount: # any of both might not exist yet for path in [root_ssh, sliver_ssh]: if not os.path.exists(path): os.mkdir(path) if not os.path.isdir(path): raise Exception if not is_mounted(root_ssh): # xxx perform mount subprocess.call("mount --bind -o ro %s %s" % (root_ssh, sliver_ssh), shell=True) logger.log("_manage_ssh_dir: mounted %s into slice %s" % (root_ssh, slicename)) else: if is_mounted(root_ssh): # xxx perform umount subprocess.call("umount %s" % (root_ssh), shell=True) logger.log("_manage_ssh_dir: umounted %s" % (root_ssh)) except: logger.log_exc("_manage_ssh_dir with slice %s failed" % slicename)
def check_authentication(self): authstatus = False if self.key or self.session: try: authstatus = self.AuthCheck() except: logger.log_exc("plcapi: failed in plcapi.check_authentication") return authstatus
def disable_syndicate_mount(sliver, mountpoint, syndicate_ip): syndicate_op("DELETE", mountpoint, syndicate_ip) if os.path.exists(mountpoint): try: os.rmdir(mountpoint) except: logger.log_exc("failed to delete syndicate mountpoint", "Syndicate")
def enable_syndicate_mount(sliver, mountpoint, syndicate_ip): if not os.path.exists(mountpoint): try: os.mkdir(mountpoint) except: logger.log_exc("failed to mkdir syndicate mountpoint", "Syndicate") return syndicate_op("PUT", mountpoint, syndicate_ip)
def sync(self): """Synchronize reality with the database contents. This method does a lot of things, and it's currently called after every single batch of database changes (a GetSlivers(), a loan, a record). It may be necessary in the future to do something smarter.""" # delete expired records now = time.time() for name, rec in list(self.items()): if rec.get('expires', now) < now: del self[name] self._compute_effective_rspecs() try: coresched = CoreSched() coresched.adjustCores(self) except: logger.log_exc("database: exception while doing core sched") # create and destroy accounts as needed logger.verbose("database: sync : fetching accounts") existing_acct_names = account.all() for name in existing_acct_names: if name not in self: logger.verbose("database: sync : ensure_destroy'ing %s"%name) account.get(name).ensure_destroyed() for name, rec in self.items(): # protect this; if anything fails for a given sliver # we still need the other ones to be handled try: sliver = account.get(name) logger.verbose("database: sync : looping on %s (shell account class from pwd %s)" %(name, sliver._get_class())) # Make sure we refresh accounts that are running if rec['instantiation'] == 'plc-instantiated': logger.verbose ("database: sync : ensure_create'ing 'instantiation' sliver %s"%name) sliver.ensure_created(rec) elif rec['instantiation'] == 'nm-controller': logger.verbose ("database: sync : ensure_create'ing 'nm-controller' sliver %s"%name) sliver.ensure_created(rec) # Back door to ensure PLC overrides Ticket in delegation. elif rec['instantiation'] == 'delegated' and sliver._get_class() != None: # if the ticket has been delivered and the nm-controller started the slice # update rspecs and keep them up to date. if sliver.is_running(): logger.verbose ("database: sync : ensure_create'ing 'delegated' sliver %s"%name) sliver.ensure_created(rec) except SystemExit as e: sys.exit(e) except: logger.log_exc("database: sync failed to handle sliver", name=name) # Wake up bwmom to update limits. bwmon.lock.set() global dump_requested dump_requested = True db_cond.notify()
def GetSlivers(data, config=None, plc=None): if 'slivers' not in data: logger.log_missing_data("hostmap.GetSlivers",'slivers') return if 'hostname' not in data: logger.log_missing_data("hostmap.GetSlivers", 'hostname') hostname = data['hostname'] for sliver in data['slivers']: slicename = sliver['name'] for tag in sliver['attributes']: if tag['tagname'] == 'slice_hostmap': fn = "/vservers/%s/etc/hosts" % slicename if not os.path.exists(fn): continue contents = file(fn,"r").read() hostmap = [] for index, entry in enumerate(tag["value"].split("\n")): parts = entry.split(" ") if len(parts)==2: line = "%s pvt.%s private%d" % (parts[0], parts[1], index) if (index==0): line = line + " headnode" if parts[1] == hostname: line = line + " pvt.self" hostmap.append(line) hostmap = "\n".join(hostmap) hostmap = PREFIX + "\n" + hostmap + "\n" + SUFFIX + "\n" if (hostmap in contents): # it's already there continue # remove anything between PREFIX and SUFFIX from contents pattern = PREFIX + ".*" + SUFFIX + "\n" regex = re.compile(pattern, re.DOTALL) if regex.search(contents) != None: contents = regex.sub(hostmap, contents) else: contents = contents + hostmap try: file(fn, "w").write(contents) except: logger.log_exc("hostmap (%s): failed to write %s" % (slicename, fn))
def stop(self): logger.verbose('sliver_libvirt: {} stop'.format(self.name)) # Remove the ebtables rule before stopping bwlimit.ebtables("-D INPUT -i veth{} -j mark --set-mark {}" .format(self.xid, self.xid)) try: self.dom.destroy() except: logger.log_exc("in sliver_libvirt.stop", name=self.name)
def main(): from argparse import ArgumentParser from plcapi import PLCAPI parser = ArgumentParser() parser.add_argument('-f', '--config', action='store', dest='config', default='/etc/planetlab/plc_config', help='PLC configuration file') parser.add_argument('-k', '--session', action='store', dest='session', default='/etc/planetlab/session', help='API session key (or file)') parser.add_argument('--noscripts', action='store_true', dest='noscripts', default=False, help='Do not run pre- or post-install scripts') parser.add_argument('--max-attempts', action='store', dest='max_attempts', default=10, help='Max number of attempts') parser.add_argument('--period', action='store', dest='period', help='Time in seconds to wait between attempts') args = parser.parse_args() # Load /etc/planetlab/plc_config config = Config(args.config) # Load /etc/planetlab/session if os.path.exists(args.session): with open(args.session) as feed: session = feed.read().strip() else: session = args.session # loop until it succeeds once # this is a change that comes with python3/fedora29 in late 2018, # because although the conf_files service is defined to systemd # as a dependency of the network, it triggers too early # at a point where eth0 is not ready # Initialize XML-RPC client attempts = 0 while True: try: plc = PLCAPI(config.plc_api_uri, config.cacert, auth=session) data = plc.GetSlivers() instance = ConfFiles(args.noscripts) instance.run_once(data) return 0 except Exception as exc: logger.log_exc("Could not receive GetSlivers() from PLC") attempts += 1 if attempts >= args.max_attempts: return 1 logger.log("Waiting for {}s before trying again".format(args.period)) time.sleep(args.period)
def parseConf(): '''Parse the vsys conf and return list of slices in conf.''' scriptacls = {} slicesinconf = [] try: f = open(VSYSCONF) for line in f.readlines(): (path, slice) = line.split() slicesinconf.append(slice) f.close() except: logger.log_exc("vsys: failed parseConf") return slicesinconf
def compiler_thread(): global poke logger.log(TAG, "compiler thread started") while 1: for i in range(0, 120): if poke: break time.sleep(1) poke = False try: compiler_handle_jobs() except: logger.log(TAG, "an exception occured while handling jobs") logger.log_exc(TAG)
def run(): global dump_requested while True: db_lock.acquire() while not dump_requested: db_cond.wait() db_pickle = cPickle.dumps(db, cPickle.HIGHEST_PROTOCOL) dump_requested = False db_lock.release() try: tools.write_file(DB_FILE, lambda f: f.write(db_pickle)) logger.log_database(db) except: logger.log_exc("database.start: failed to pickle/dump")
def __call__(self, environ, start_response, path=None): if util.unix_time() >= config.EVENT_END and util.get_real_ip( environ) != config.ADMIN_IP: start_response( '200 Bepis', [('Access-Control-Allow-Origin', '*'), ('Access-Control-Allow-Methods', 'POST, GET, OPTIONS'), ('Content-Type', 'application/json')]) return [ b'{"success": false, "message": "Fools2019 is over - the event servers will be shut down soon.<br><br>Stay tuned for the source code release!"}' ] if torbanlist.is_banned(environ): start_response( '200 ZZAZZ Is Legion', [('Access-Control-Allow-Origin', '*'), ('Access-Control-Allow-Methods', 'POST, GET, OPTIONS'), ('Content-Type', 'application/json')]) return [ b'{"success": false, "message": "To help minimize abuse, Fools2019 is not accessible from public proxies and Tor exit nodes. We\'re sorry for the inconvenience."}' ] try: method = environ['REQUEST_METHOD'] if path is None: path = self.path_info(environ) result = self.resolve(method, path) if result.app is not None: kwargs = result.match.groupdict() if kwargs: args = () else: kwargs = {} args = result.match.groups() environ['wsgiorg.routing_args'] = (args, kwargs) if isinstance(result.app, Router): return result.app(environ, start_response, path=result.rest) else: return result.app(environ, start_response) except Exception as e: if config.DEBUG: raise e logger.log_exc(TAG) start_response( '200 Oopsie Woopsie', [('Access-Control-Allow-Origin', '*'), ('Access-Control-Allow-Methods', 'POST, GET, OPTIONS'), ('Content-Type', 'application/json')]) return [ b'{"success": false, "message": "Oopsie Woopsie! The event server made a fucky wucky. A wittle fucko boingo. The code monkeys at our headquarters are working VEWY HAWD to fix this. Please come back later once they\'re done!"}' ]
def sql(query, params=(), log_errors=True): ret = [] with db_lock: try: cur = db.cursor() for i in cur.execute(query, params): ret.append(util.DotDict(i)) db.commit() except: if log_errors: logger.log( TAG, "sql query made a fucky wucky, a wittle fucko boingo") logger.log_exc(TAG) return ret
def suspend_slice(self, slicename): logger.log('reservation: Suspending slice %s'%(slicename)) self.debug_box('before suspending',slicename) worker=account.get(slicename) try: logger.log("reservation: Located worker object %r"%worker) worker.stop() except AttributeError: # when the underlying worker is not entirely initialized yet pass except: logger.log_exc("reservation.suspend_slice: Could not stop slice %s through its worker"%slicename) # we hope the status line won't return anything self.debug_box('after suspending',slicename)
def SetSliverTag(plc, slice, tagname, value): node_id = tools.node_id() slivertags=plc.GetSliceTags({"name":slice,"node_id":node_id,"tagname":tagname}) if len(slivertags)==0: # looks like GetSlivers reports about delegated/nm-controller slices that do *not* belong to this node # and this is something that AddSliceTag does not like try: slivertag_id=plc.AddSliceTag(slice,tagname,value,node_id) except: logger.log_exc ("sliverauth.SetSliverTag (probably delegated) slice=%(slice)s tag=%(tagname)s node_id=%(node_id)d"%locals()) pass else: slivertag_id=slivertags[0]['slice_tag_id'] plc.UpdateSliceTag(slivertag_id,value)
def __init__(self, rec): name=rec['name'] logger.verbose ('sliver_vs: %s init'%name) try: logger.log("sliver_vs: %s: first chance..."%name) vserver.VServer.__init__(self, name,logfile='/var/log/nodemanager') except Exception, err: if not isinstance(err, vserver.NoSuchVServer): # Probably a bad vserver or vserver configuration file logger.log_exc("sliver_vs:__init__ (first chance) %s",name=name) logger.log('sliver_vs: %s: recreating bad vserver' % name) self.destroy(name) self.create(name, rec) logger.log("sliver_vs: %s: second chance..."%name) vserver.VServer.__init__(self, name,logfile='/var/log/nodemanager')
def run(self): # make sure to create /etc/planetlab/virt so others can read that # used e.g. in vsys-scripts's sliceip tools.get_node_virt() try: if self.options.daemon: tools.daemon() # set log level if (self.options.verbose): logger.set_level(logger.LOG_VERBOSE) # Load /etc/planetlab/plc_config config = Config(self.options.config) try: other_pid = tools.pid_file() if other_pid != None: print """There might be another instance of the node manager running as pid %d. If this is not the case, please remove the pid file %s. -- exiting""" % ( other_pid, tools.PID_FILE) return except OSError, err: print "Warning while writing PID file:", err # load modules self.loaded_modules = [] for module in self.modules: try: m = __import__(module) logger.verbose("nodemanager: triggering %s.start" % m.__name__) m.start() self.loaded_modules.append(m) except ImportError, err: logger.log_exc( "ERROR while loading module %s - skipping:" % module) # if we fail to load any of these, it's really no need to go on any further if module in NodeManager.core_modules: logger.log("FATAL : failed to load core module %s" % module) except AttributeError, err: # triggered when module doesn't have a 'start' method logger.log_exc( "ERROR while starting module %s - skipping:" % module) # if we fail to load any of these, it's really no need to go on any further if module in NodeManager.core_modules: logger.log("FATAL : failed to start core module %s" % module)
def suspend_slice(self, slicename): logger.log('reservation: Suspending slice %s' % (slicename)) self.debug_box('before suspending', slicename) worker = account.get(slicename) try: logger.log("reservation: Located worker object %r" % worker) worker.stop() except AttributeError: # when the underlying worker is not entirely initialized yet pass except: logger.log_exc( "reservation.suspend_slice: Could not stop slice %s through its worker" % slicename) # we hope the status line won't return anything self.debug_box('after suspending', slicename)
def setSliversVref (self, data): """ Tweak the 'vref' attribute in all slivers based on the 'GetSliceFamily' key """ # GetSlivers exposes the result of GetSliceFamily() as an separate key in data # It is safe to override the attributes with this, as this method has the right logic for sliver in data.get('slivers'): try: slicefamily=sliver.get('GetSliceFamily') for att in sliver['attributes']: if att['tagname']=='vref': att['value']=slicefamily continue sliver['attributes'].append({ 'tagname':'vref','value':slicefamily}) except: logger.log_exc("nodemanager: Could not overwrite 'vref' attribute from 'GetSliceFamily'",name=sliver['name'])
def install_and_enable_vinit (self): vinit_source="/usr/share/NodeManager/sliver-initscripts/vinit" vinit_script="/vservers/%s/etc/rc.d/init.d/vinit"%self.name rc3_link="/vservers/%s/etc/rc.d/rc3.d/S99vinit"%self.name rc3_target="../init.d/vinit" # install in sliver code=file(vinit_source).read() if tools.replace_file_with_string(vinit_script,code,chmod=0755): logger.log("vsliver_vs: %s: installed generic vinit rc script"%self.name) # create symlink for runlevel 3 if not os.path.islink(rc3_link): try: logger.log("vsliver_vs: %s: creating runlevel3 symlink %s"%(self.name,rc3_link)) os.symlink(rc3_target,rc3_link) except: logger.log_exc("vsliver_vs: %s: failed to create runlevel3 symlink %s"%rc3_link)
def parseConf(conf = CODEMUXCONF): '''Parse the CODEMUXCONF and return dict of slices in conf. {slice: (host,port)}''' slicesinconf = {} # default try: f = open(conf) for line in f.readlines(): if line.startswith("#") \ or (len(line.split()) > 4) \ or (len(line.split()) < 3): continue (host, slice, port) = line.split()[:3] logger.log("codemux: found %s in conf" % slice, 2) slicesinconf.setdefault(slice, []) slicesinconf[slice].append({"host": host, "port": port}) f.close() except IOError: logger.log_exc("codemux.parseConf got IOError") return slicesinconf
def log_call_read(command, timeout=logger.default_timeout_minutes*60, poll=1): message = " ".join(command) logger.log("log_call: running command %s" % message) logger.verbose("log_call: timeout=%r s" % timeout) logger.verbose("log_call: poll=%r s" % poll) trigger = time.time()+timeout try: child = subprocess.Popen( command, bufsize=1, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True, universal_newlines=True) stdout = "" while True: # see if anything can be read within the poll interval (r, w, x) = select.select([child.stdout], [], [], poll) if r: stdout = stdout + child.stdout.read(1) # is process over ? returncode=child.poll() # yes if returncode != None: stdout = stdout + child.stdout.read() # child is done and return 0 if returncode == 0: logger.log("log_call:end command (%s) completed" % message) if stdout != "": logger.log("log_call:stdout: %s" % stdout) return (returncode, stdout) # child has failed else: log("log_call:end command (%s) returned with code %d" %(message, returncode)) return (returncode, stdout) # no : still within timeout ? if time.time() >= trigger: child.terminate() logger.log("log_call:end terminating command (%s) - exceeded timeout %d s" %(message, timeout)) return (-2, None) break except Exception as e: logger.log_exc("failed to run command %s -> %s" % (message, e)) return (-1, None)
def expose_ssh_dir (self): try: root_ssh="/home/%s/.ssh"%self.name sliver_ssh="/vservers/%s/home/%s/.ssh"%(self.name,self.name) # any of both might not exist yet for path in [root_ssh,sliver_ssh]: if not os.path.exists (path): os.mkdir(path) if not os.path.isdir (path): raise Exception mounts=file('/proc/mounts').read() if mounts.find(sliver_ssh)<0: # xxx perform mount subprocess.call("mount --bind -o ro %s %s"%(root_ssh,sliver_ssh),shell=True) logger.log("expose_ssh_dir: %s mounted into slice %s"%(root_ssh,self.name)) except: logger.log_exc("expose_ssh_dir with slice %s failed"%self.name)
def syndicate_op(op, mountpoint, syndicate_ip): #op="GET" #syndicate_ip="www.vicci.org" logger.log("Syndicate: Http op %s on url %s to host %s" % (op, mountpoint, syndicate_ip)) try: conn = httplib.HTTPSConnection(syndicate_ip, timeout=60) conn.request(op, mountpoint) r1 = conn.getresponse() except: logger.log_exc("Exception when contacting syndicate sliver", "Syndicate") if (r1.status / 100) != 2: logger.log("Syndicate: Error: Got http result %d on %s" % (r1.status, mountpoint)) return False return result
def restart_slice(self, slicename): logger.log('reservation: Restarting slice %s'%(slicename)) self.debug_box('before restarting',slicename) worker=account.get(slicename) try: # dig in self.data to retrieve corresponding rec slivers = [ sliver for sliver in self.data['slivers'] if sliver['name']==slicename ] sliver=slivers[0] record=database.db.get(slicename) record['enabled']=True # logger.log("reservation: Located worker object %r"%worker) logger.log("reservation: Located record at the db %r"%record) worker.start(record) except: logger.log_exc("reservation.restart_slice: Could not start slice %s through its worker"%slicename) # we hope the status line won't return anything self.debug_box('after restarting',slicename)
def writeConf(slivers, conf = CODEMUXCONF): '''Write conf with default entry up top. Elements in [] should have lower order domain names first. Restart service.''' f = open(conf, "w") # This needs to be the first entry... try: f.write("* root 1080 %s\n" % Config().PLC_PLANETFLOW_HOST) except AttributeError: logger.log("codemux: Can't find PLC_CONFIG_HOST in config. Using PLC_API_HOST") f.write("* root 1080 %s\n" % Config().PLC_API_HOST) # Sort items for like domains for mapping in slivers: for (host, params) in mapping.iteritems(): if params['slice'] == "root": continue f.write("%s %s %s %s\n" % (host, params['slice'], params['port'], params['ip'])) f.truncate() f.close() try: restartService() except: logger.log_exc("codemux.writeConf failed to restart service")
def run(): """ When run as a thread, wait for event, lock db, deep copy it, release it, run bwmon.GetSlivers(), then go back to waiting. """ logger.verbose("bwmon: Thread started") while True: lock.wait() logger.verbose("bwmon: Event received. Running.") database.db_lock.acquire() nmdbcopy = copy.deepcopy(database.db) database.db_lock.release() try: if getDefaults(nmdbcopy) and len(bwlimit.tc("class show dev %s" % dev_default)) > 0: # class show to check if net:InitNodeLimit:bwlimit.init has run. sync(nmdbcopy) else: logger.log("bwmon: BW limits DISABLED.") except: logger.log_exc("bwmon failed") lock.clear()
def run(self): # make sure to create /etc/planetlab/virt so others can read that # used e.g. in vsys-scripts's sliceip tools.get_node_virt() try: if self.options.daemon: tools.daemon() # set log level if (self.options.verbose): logger.set_level(logger.LOG_VERBOSE) # Load /etc/planetlab/plc_config config = Config(self.options.config) try: other_pid = tools.pid_file() if other_pid != None: print """There might be another instance of the node manager running as pid %d. If this is not the case, please remove the pid file %s. -- exiting""" % (other_pid, tools.PID_FILE) return except OSError, err: print "Warning while writing PID file:", err # load modules self.loaded_modules = [] for module in self.modules: try: m = __import__(module) logger.verbose("nodemanager: triggering %s.start"%m.__name__) m.start() self.loaded_modules.append(m) except ImportError, err: logger.log_exc ("ERROR while loading module %s - skipping:" % module) # if we fail to load any of these, it's really no need to go on any further if module in NodeManager.core_modules: logger.log("FATAL : failed to load core module %s"%module) except AttributeError, err: # triggered when module doesn't have a 'start' method logger.log_exc ("ERROR while starting module %s - skipping:" % module) # if we fail to load any of these, it's really no need to go on any further if module in NodeManager.core_modules: logger.log("FATAL : failed to start core module %s"%module)
def SetSliverTag(plc, slice, tagname, value): node_id = tools.node_id() slivertags = plc.GetSliceTags({ "name": slice, "node_id": node_id, "tagname": tagname }) if len(slivertags) == 0: # looks like GetSlivers reports about delegated/nm-controller slices that do *not* belong to this node # and this is something that AddSliceTag does not like try: slivertag_id = plc.AddSliceTag(slice, tagname, value, node_id) except: logger.log_exc( "sliverauth.SetSliverTag (probably delegated) slice=%(slice)s tag=%(tagname)s node_id=%(node_id)d" % locals()) pass else: slivertag_id = slivertags[0]['slice_tag_id'] plc.UpdateSliceTag(slivertag_id, value)
def install_and_enable_vinit_for_init(self): """ suitable for init-based VMs """ vinit_source = "/usr/share/NodeManager/sliver-initscripts/vinit" vinit_script = "/vservers/%s/etc/rc.d/init.d/vinit" % self.name enable_link = "/vservers/%s/etc/rc.d/rc3.d/S99vinit" % self.name enable_target = "../init.d/vinit" # install in sliver with open(vinit_source) as f: code = f.read() if tools.replace_file_with_string(vinit_script, code, chmod=0o755): logger.log("Initscript: %s: installed generic vinit rc script" % self.name) # create symlink for runlevel 3 if not os.path.islink(enable_link): try: logger.log("Initscript: %s: creating runlevel3 symlink %s" % (self.name, enable_link)) os.symlink(enable_target, enable_link) except: logger.log_exc("Initscript failed to create runlevel3 symlink %s" % enable_link, name=self.name)
def fork_as(su, function, *args): """fork(), cd / to avoid keeping unused directories open, close all nonstandard file descriptors (to avoid capturing open sockets), fork() again (to avoid zombies) and call <function> with arguments <args> in the grandchild process. If <su> is not None, set our group and user ids appropriately in the child process.""" child_pid = os.fork() if child_pid == 0: try: os.chdir('/') close_nonstandard_fds() if su: pw_ent = pwd.getpwnam(su) os.setegid(pw_ent[3]) os.seteuid(pw_ent[2]) child_pid = os.fork() if child_pid == 0: function(*args) except: os.seteuid(os.getuid()) # undo su so we can write the log file os.setegid(os.getgid()) logger.log_exc("tools: fork_as") os._exit(0) else: os.waitpid(child_pid, 0)
def setSliversVref(self, data): """ Tweak the 'vref' attribute in all slivers based on the 'GetSliceFamily' key """ # GetSlivers exposes the result of GetSliceFamily() as an separate key in data # It is safe to override the attributes with this, as this method has the right logic for sliver in data.get('slivers'): try: slicefamily = sliver.get('GetSliceFamily') for att in sliver['attributes']: if att['tagname'] == 'vref': att['value'] = slicefamily continue sliver['attributes'].append({ 'tagname': 'vref', 'value': slicefamily }) except: logger.log_exc( "nodemanager: Could not overwrite 'vref' attribute from 'GetSliceFamily'", name=sliver['name'])
def log_call_read(command,timeout=logger.default_timeout_minutes*60,poll=1): message=" ".join(command) logger.log("log_call: running command %s" % message) logger.verbose("log_call: timeout=%r s" % timeout) logger.verbose("log_call: poll=%r s" % poll) trigger=time.time()+timeout try: child = subprocess.Popen(command, bufsize=1, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True) stdout = "" while True: # see if anything can be read within the poll interval (r,w,x)=select.select([child.stdout],[],[],poll) if r: stdout = stdout + child.stdout.read(1) # is process over ? returncode=child.poll() # yes if returncode != None: stdout = stdout + child.stdout.read() # child is done and return 0 if returncode == 0: logger.log("log_call:end command (%s) completed" % message) if stdout != "": logger.log("log_call:stdout: %s" % stdout) return (returncode, stdout) # child has failed else: logger.log("log_call:end command (%s) returned with code %d" %(message,returncode)) return (returncode, stdout) # no : still within timeout ? if time.time() >= trigger: child.terminate() logger.log("log_call:end terminating command (%s) - exceeded timeout %d s"%(message,timeout)) return (-2, None) break except: logger.log_exc("failed to run command %s" % message) return (-1, None)
def run(): """ When run as a thread, wait for event, lock db, deep copy it, release it, run bwmon.GetSlivers(), then go back to waiting. """ logger.verbose("bwmon: Thread started") while True: lock.wait() logger.verbose("bwmon: Event received. Running.") database.db_lock.acquire() nmdbcopy = copy.deepcopy(database.db) database.db_lock.release() try: if getDefaults(nmdbcopy) and len( bwlimit.tc("class show dev %s" % dev_default)) > 0: # class show to check if net:InitNodeLimit:bwlimit.init has run. sync(nmdbcopy) else: logger.log("bwmon: BW limits DISABLED.") except: logger.log_exc("bwmon failed") lock.clear()
def compile_single_save(session): try: generator.preprocess.perform(session) subprocess.check_call([ RGBASM, "-o", SAV_DIRECTORY + "/bin/mapdata.obj", SAV_DIRECTORY + "/main.asm" ], cwd=SAV_DIRECTORY) dump_rgbds_object_file(SAV_DIRECTORY + "/bin/mapdata.obj") subprocess.check_call([ RGBASM, "-o", SAV_DIRECTORY + "/bin/main.obj", "-D", "FINAL_PASS", SAV_DIRECTORY + "/main.asm" ], cwd=SAV_DIRECTORY) dump_rgbds_object_file(SAV_DIRECTORY + "/bin/main.obj") generator.postprocess.perform(session) return True except: logger.log(TAG, "an exception occured while compiling save data") logger.log_exc(TAG) return False
def install_and_enable_vinit_for_systemd(self): """ suitable for systemd-based VMs """ ########## ########## initscripts : current status - march 2015 ########## # # the initscripts business worked smoothly up to f18 inclusive # with f20 and the apparition of machinectl, things started to # behave really weird # # so starting with f20, after having tried pretty hard to get this right, # but to no success obviously, and in order to stay on the safe side # of the force, I am turning off the initscript machinery completely # that is to say: the vinit.service does not get installed at all # if os.path.isfile('/usr/bin/machinectl'): logger.log("WARNING: initscripts are not supported anymore in nodes that have machinectl") return vinit_source = "/usr/share/NodeManager/sliver-systemd/vinit.service" vinit_unit_file = "/vservers/%s/usr/lib/systemd/system/vinit.service" % self.name enable_link = "/vservers/%s/etc/systemd/system/multi-user.target.wants/vinit.service" % self.name enable_target = "/usr/lib/systemd/system/vinit.service" # install in sliver with open(vinit_source) as f: code = f.read() if tools.replace_file_with_string(vinit_unit_file, code, chmod=0o755): logger.log("Initscript: %s: installed vinit.service unit file" % self.name) # create symlink for enabling this unit if not os.path.islink(enable_link): try: logger.log("Initscript: %s: creating enabling symlink %s" % (self.name, enable_link)) os.symlink(enable_target, enable_link) except: logger.log_exc("Initscript failed to create enabling symlink %s" % enable_link, name=name)
def restart_slice(self, slicename): logger.log('reservation: Restarting slice %s' % (slicename)) self.debug_box('before restarting', slicename) worker = account.get(slicename) try: # dig in self.data to retrieve corresponding rec slivers = [ sliver for sliver in self.data['slivers'] if sliver['name'] == slicename ] sliver = slivers[0] record = database.db.get(slicename) record['enabled'] = True # logger.log("reservation: Located worker object %r" % worker) logger.log("reservation: Located record at the db %r" % record) worker.start(record) except: logger.log_exc( "reservation.restart_slice: Could not start slice %s through its worker" % slicename) # we hope the status line won't return anything self.debug_box('after restarting', slicename)
def start(): """The database dumper daemon. When it starts up, it populates the database with the last dumped database. It proceeds to handle dump requests forever.""" def run(): global dump_requested while True: db_lock.acquire() while not dump_requested: db_cond.wait() db_pickle = cPickle.dumps(db, cPickle.HIGHEST_PROTOCOL) dump_requested = False db_lock.release() try: tools.write_file(DB_FILE, lambda f: f.write(db_pickle)) logger.log_database(db) except: logger.log_exc("database.start: failed to pickle/dump") global db try: f = open(DB_FILE) try: db = cPickle.load(f) finally: f.close() except IOError: logger.log( "database: Could not load %s -- starting from a fresh database" % DB_FILE) db = Database() except: logger.log_exc("database: failed in start") db = Database() logger.log('database.start') tools.as_daemon_thread(run)
def GetSlivers(data, conf=None, plc=None): logger.log("omf_resctl.GetSlivers") if 'accounts' not in data: logger.log_missing_data("omf_resctl.GetSlivers", 'accounts') return try: xmpp_server = data['xmpp']['server'] if not xmpp_server: # we have the key but no value, just as bad raise Exception except: # disabled feature - bailing out logger.log( "omf_resctl: PLC_OMF config unsufficient (not enabled, or no server set), -- plugin exiting" ) return hostname = data['hostname'] def is_omf_friendly(sliver): for chunk in sliver['attributes']: if chunk['tagname'] == 'omf_control': return True for sliver in data['slivers']: # skip non OMF-friendly slices if not is_omf_friendly(sliver): continue slicename = sliver['name'] expires = str(sliver['expires']) yaml_template = config_ple_template yaml_contents = yaml_template\ .replace('_xmpp_server_',xmpp_server)\ .replace('_slicename_',slicename)\ .replace('_hostname_',hostname)\ .replace('_expires_',expires) yaml_full_path = "/vservers/%s/%s" % (slicename, yaml_slice_path) yaml_full_dir = os.path.dirname(yaml_full_path) if not os.path.isdir(yaml_full_dir): try: os.makedirs(yaml_full_dir) except OSError: pass config_changes = tools.replace_file_with_string( yaml_full_path, yaml_contents) logger.log("yaml_contents length=%d, config_changes=%r" % (len(yaml_contents), config_changes)) # would make sense to also check for changes to authorized_keys # would require saving a copy of that some place for comparison # xxx todo keys_changes = False if config_changes or keys_changes: # instead of restarting the service we call a companion script try: fetch_trigger_script_if_missing(slicename) # the trigger script actually needs to be run in the slice context of course # in addition there is a requirement to pretend we run as a login shell # hence sudo -i slice_command = ["sudo", "-i", omf_rc_trigger_script] to_run = tools.command_in_slice(slicename, slice_command) log_filename = "/vservers/%s/%s" % (slicename, omf_rc_trigger_log) logger.log("omf_resctl: starting %s" % to_run) logger.log("redirected into %s" % log_filename) logger.log("*not* waiting for completion..") with open(log_filename, "a") as log_file: subprocess.Popen(to_run, stdout=log_file, stderr=subprocess.STDOUT) # a first version tried to 'communicate' on that subprocess instance # but that tended to create deadlocks in some cases # causing nodemanager to stall... # we're only losing the child's retcod, no big deal except: import traceback traceback.print_exc() logger.log_exc("omf_resctl: WARNING: Could not call trigger script %s"%\ omf_rc_trigger_script, name=slicename) else: logger.log("omf_resctl: %s: omf_control'ed sliver has no change" % slicename)
while True: # Main nodemanager Loop work_beg = time.time() logger.log( 'nodemanager: mainloop - calling GetSlivers - period=%d random=%d' % (iperiod, irandom)) self.GetSlivers(config, plc) delay = iperiod + random.randrange(0, irandom) work_end = time.time() work_duration = int(work_end - work_beg) logger.log( 'nodemanager: mainloop has worked for %s s - sleeping for %d s' % (work_duration, delay)) time.sleep(delay) except: logger.log_exc("nodemanager: failed in run") def run(): logger.log( "======================================== Entering nodemanager.py") NodeManager().run() if __name__ == '__main__': run() else: # This is for debugging purposes. Open a copy of Python and import nodemanager tools.as_daemon_thread(run)
def create(name, rec=None): ''' Create dirs, copy fs image, lxc_create ''' logger.verbose('sliver_lxc: %s create' % (name)) conn = Sliver_Libvirt.getConnection(Sliver_LXC.TYPE) # Get the type of image from vref myplc tags specified as: # pldistro = lxc # fcdistro = squeeze # arch x86_64 arch = 'x86_64' tags = rec['rspec']['tags'] if 'arch' in tags: arch = tags['arch'] if arch == 'i386': arch = 'i686' vref = rec['vref'] if vref is None: vref = "lxc-f14-x86_64" logger.log( "sliver_libvirt: %s: WARNING - no vref attached, using hard-wired default %s" % (name, vref)) refImgDir = os.path.join(Sliver_LXC.REF_IMG_BASE_DIR, vref) containerDir = os.path.join(Sliver_LXC.CON_BASE_DIR, name) # check the template exists -- there's probably a better way.. if not os.path.isdir(refImgDir): logger.log( 'sliver_lxc: %s: ERROR Could not create sliver - reference image %s not found' % (name, vref)) logger.log('sliver_lxc: %s: ERROR Expected reference image in %s' % (name, refImgDir)) return # Snapshot the reference image fs (assume the reference image is in its own # subvolume) command = ['btrfs', 'subvolume', 'snapshot', refImgDir, containerDir] if not logger.log_call(command, timeout=15 * 60): logger.log('sliver_lxc: ERROR Could not create BTRFS snapshot at', containerDir) return command = ['chmod', '755', containerDir] logger.log_call(command, timeout=15 * 60) # TODO: set quotas... # Set hostname. A valid hostname cannot have '_' #with open(os.path.join(containerDir, 'etc/hostname'), 'w') as f: # print >>f, name.replace('_', '-') # Add slices group if not already present try: group = grp.getgrnam('slices') except: command = ['/usr/sbin/groupadd', 'slices'] logger.log_call(command, timeout=15 * 60) # Add unix account (TYPE is specified in the subclass) command = [ '/usr/sbin/useradd', '-g', 'slices', '-s', Sliver_LXC.SHELL, name, '-p', '*' ] logger.log_call(command, timeout=15 * 60) command = ['mkdir', '/home/%s/.ssh' % name] logger.log_call(command, timeout=15 * 60) # Create PK pair keys to connect from the host to the guest without # password... maybe remove the need for authentication inside the # guest? command = [ 'su', '-s', '/bin/bash', '-c', 'ssh-keygen -t rsa -N "" -f /home/%s/.ssh/id_rsa' % (name) ] logger.log_call(command, timeout=60) command = ['chown', '-R', '%s.slices' % name, '/home/%s/.ssh' % name] logger.log_call(command, timeout=30) command = ['mkdir', '%s/root/.ssh' % containerDir] logger.log_call(command, timeout=10) command = [ 'cp', '/home/%s/.ssh/id_rsa.pub' % name, '%s/root/.ssh/authorized_keys' % containerDir ] logger.log_call(command, timeout=30) logger.log("creating /etc/slicename file in %s" % os.path.join(containerDir, 'etc/slicename')) try: file(os.path.join(containerDir, 'etc/slicename'), 'w').write(name) except: logger.log_exc("exception while creating /etc/slicename") try: file(os.path.join(containerDir, 'etc/slicefamily'), 'w').write(vref) except: logger.log_exc("exception while creating /etc/slicefamily") uid = None try: uid = getpwnam(name).pw_uid except KeyError: # keyerror will happen if user id was not created successfully logger.log_exc("exception while getting user id") if uid is not None: logger.log("uid is %d" % uid) command = ['mkdir', '%s/home/%s' % (containerDir, name)] logger.log_call(command, timeout=10) command = ['chown', name, '%s/home/%s' % (containerDir, name)] logger.log_call(command, timeout=10) etcpasswd = os.path.join(containerDir, 'etc/passwd') etcgroup = os.path.join(containerDir, 'etc/group') if os.path.exists(etcpasswd): # create all accounts with gid=1001 - i.e. 'slices' like it is in the root context slices_gid = 1001 logger.log( "adding user %(name)s id %(uid)d gid %(slices_gid)d to %(etcpasswd)s" % (locals())) try: file(etcpasswd, 'a').write( "%(name)s:x:%(uid)d:%(slices_gid)d::/home/%(name)s:/bin/bash\n" % locals()) except: logger.log_exc("exception while updating %s" % etcpasswd) logger.log( "adding group slices with gid %(slices_gid)d to %(etcgroup)s" % locals()) try: file(etcgroup, 'a').write("slices:x:%(slices_gid)d\n" % locals()) except: logger.log_exc("exception while updating %s" % etcgroup) sudoers = os.path.join(containerDir, 'etc/sudoers') if os.path.exists(sudoers): try: file(sudoers, 'a').write("%s ALL=(ALL) NOPASSWD: ALL\n" % name) except: logger.log_exc("exception while updating /etc/sudoers") # customizations for the user environment - root or slice uid # we save the whole business in /etc/planetlab.profile # and source this file for both root and the slice uid's .profile # prompt for slice owner, + LD_PRELOAD for transparently wrap bind pl_profile = os.path.join(containerDir, "etc/planetlab.profile") ld_preload_text = """# by default, we define this setting so that calls to bind(2), # when invoked on 0.0.0.0, get transparently redirected to the public interface of this node # see https://svn.planet-lab.org/wiki/LxcPortForwarding""" usrmove_path_text = """# VM's before Features/UsrMove need /bin and /sbin in their PATH""" usrmove_path_code = """ pathmunge () { if ! echo $PATH | /bin/egrep -q "(^|:)$1($|:)" ; then if [ "$2" = "after" ] ; then PATH=$PATH:$1 else PATH=$1:$PATH fi fi } pathmunge /bin after pathmunge /sbin after unset pathmunge """ with open(pl_profile, 'w') as f: f.write("export PS1='%s@\H \$ '\n" % (name)) f.write("%s\n" % ld_preload_text) f.write("export LD_PRELOAD=/etc/planetlab/lib/bind_public.so\n") f.write("%s\n" % usrmove_path_text) f.write("%s\n" % usrmove_path_code) # make sure this file is sourced from both root's and slice's .profile enforced_line = "[ -f /etc/planetlab.profile ] && source /etc/planetlab.profile\n" for path in ['root/.profile', 'home/%s/.profile' % name]: from_root = os.path.join(containerDir, path) # if dir is not yet existing let's forget it for now if not os.path.isdir(os.path.dirname(from_root)): continue found = False try: contents = file(from_root).readlines() for content in contents: if content == enforced_line: found = True except IOError: pass if not found: with open(from_root, "a") as user_profile: user_profile.write(enforced_line) # in case we create the slice's .profile when writing if from_root.find("/home") >= 0: command = ['chown', '%s:slices' % name, from_root] logger.log_call(command, timeout=5) # Lookup for xid and create template after the user is created so we # can get the correct xid based on the name of the slice xid = bwlimit.get_xid(name) # Template for libvirt sliver configuration template_filename_sliceimage = os.path.join( Sliver_LXC.REF_IMG_BASE_DIR, 'lxc_template.xml') if os.path.isfile(template_filename_sliceimage): logger.log("WARNING: using compat template %s" % template_filename_sliceimage) template_filename = template_filename_sliceimage else: logger.log("Cannot find XML template %s" % template_filename_sliceimage) return interfaces = Sliver_Libvirt.get_interfaces_xml(rec) try: with open(template_filename) as f: template = Template(f.read()) xml = template.substitute(name=name, xid=xid, interfaces=interfaces, arch=arch) except IOError: logger.log('Failed to parse or use XML template file %s' % template_filename) return # Lookup for the sliver before actually # defining it, just in case it was already defined. try: dom = conn.lookupByName(name) except: dom = conn.defineXML(xml) logger.verbose('lxc_create: %s -> %s' % (name, Sliver_Libvirt.debuginfo(dom)))
def GetSlivers(self, config, plc): """Retrieves GetSlivers at PLC and triggers callbacks defined in modules/plugins""" try: logger.log("nodemanager: Syncing w/ PLC") # retrieve GetSlivers from PLC data = plc.GetSlivers() # use the magic 'default' slice to retrieve system-wide defaults self.getPLCDefaults(data, config) # tweak the 'vref' attribute from GetSliceFamily self.setSliversVref(data) # dump it too, so it can be retrieved later in case of comm. failure self.dumpSlivers(data) # log it for debug purposes, no matter what verbose is logger.log_slivers(data) logger.verbose("nodemanager: Sync w/ PLC done") last_data = data except: logger.log_exc("nodemanager: failed in GetSlivers") # XXX So some modules can at least boostrap. logger.log( "nodemanager: Can't contact PLC to GetSlivers(). Continuing." ) data = {} # for modules that request it though the 'persistent_data' property last_data = self.loadSlivers() logger.log("*************************************************") #logger.log("we should provide these information to PEARL TEAM") logger.log_map({}, "******************************************") #wangyang,get slice map from date fetched from myplc slicemap = self.getslicemap(data) #logger.log_map(slicemap,"slicemap") #wangyang,get slice map from db slicemapdb = self.loadmap(slicemap) #logger.log_map(slicemapdb,"slicedb") #wangyang,compare two files slicemapdb = self.handlemap(slicemap, slicemapdb) #logger.log_map(slicemapdb,"dbafter compare") #wangyang,update to router slicemapdb = self.updatetoRouter(slicemapdb) #logger.log_map(slicemapdb,"db after update") #wangyang,update to router self.savemap(slicemapdb) #wangyang,write into txt logger.log_map(slicemapdb, "write to db") ''' for sliver in last_data['slivers']: logger.log("sliceid is %s"%sliver['slice_id']) if sliver['slice_id'] > 4: logfile = '/var/log/slice/slice.'+sliver['name'] #logger.logslice("slicename: %s"%sliver['name'],logfile) logger.logslice("sliceid: %s"%sliver['slice_id'],logfile) vmid=self.createslver(sliver['slice_id']) logger.log("vmid is %s"%vmid) logger.logmap(sliver['slice_id'],vmid) #logger.logslice("keys: %s"%sliver['keys'],logfile) ''' logger.log("*************************************************") # Invoke GetSlivers() functions from the callback modules for module in self.loaded_modules: logger.verbose('nodemanager: triggering %s.GetSlivers' % module.__name__) try: callback = getattr(module, 'GetSlivers') module_data = data if getattr(module, 'persistent_data', False): module_data = last_data callback(data, config, plc) except: logger.log_exc( "nodemanager: GetSlivers failed to run callback for module %r" % module)
def GetSlivers(data, config = None, plc=None, fullupdate=True): """This function has two purposes. One, convert GetSlivers() data into a more convenient format. Two, even if no updates are coming in, use the GetSlivers() heartbeat as a cue to scan for expired slivers.""" logger.verbose("slivermanager: Entering GetSlivers with fullupdate=%r"%fullupdate) for key in data.keys(): logger.verbose('slivermanager: GetSlivers key : ' + key) node_id = None try: f = open('/etc/planetlab/node_id') try: node_id = int(f.read()) finally: f.close() except: logger.log_exc("slivermanager: GetSlivers failed to read /etc/planetlab/node_id") if data.has_key('node_id') and data['node_id'] != node_id: return if data.has_key('networks'): for network in data['networks']: if network['is_primary'] and network['bwlimit'] is not None: DEFAULT_ALLOCATION['net_max_rate'] = network['bwlimit'] / 1000 # Take initscripts (global) returned by API, build a hash scriptname->code iscripts_hash = {} if 'initscripts' not in data: logger.log_missing_data("slivermanager.GetSlivers",'initscripts') return for initscript_rec in data['initscripts']: logger.verbose("slivermanager: initscript: %s" % initscript_rec['name']) iscripts_hash[str(initscript_rec['name'])] = initscript_rec['script'] adjustReservedSlivers (data) for sliver in data['slivers']: logger.verbose("slivermanager: %s: slivermanager.GetSlivers in slivers loop"%sliver['name']) rec = sliver.copy() rec.setdefault('timestamp', data['timestamp']) # convert attributes field to a proper dict attributes = {} for attr in rec.pop('attributes'): attributes[attr['tagname']] = attr['value'] rec.setdefault("attributes", attributes) # squash keys keys = rec.pop('keys') rec.setdefault('keys', '\n'.join([key_struct['key'] for key_struct in keys])) ## 'Type' isn't returned by GetSlivers() for whatever reason. We're overloading ## instantiation here, but i suppose its the same thing when you think about it. -FA # Handle nm-controller here if rec['instantiation'].lower() == 'nm-controller': rec.setdefault('type', attributes.get('type', 'controller.Controller')) else: rec.setdefault('type', attributes.get('type', sliver_default_type)) # set the vserver reference. If none, set to default. rec.setdefault('vref', attributes.get('vref', 'default')) ### set initscripts; set empty rec['initscript'] if not # if tag 'initscript_code' is set, that's what we use iscode = attributes.get('initscript_code','') if iscode: rec['initscript']=iscode else: isname = attributes.get('initscript') if isname is not None and isname in iscripts_hash: rec['initscript'] = iscripts_hash[isname] else: rec['initscript'] = '' # set delegations, if none, set empty rec.setdefault('delegations', attributes.get("delegations", [])) # extract the implied rspec rspec = {} rec['rspec'] = rspec for resname, default_amount in DEFAULT_ALLOCATION.iteritems(): try: t = type(default_amount) amount = t.__new__(t, attributes[resname]) except (KeyError, ValueError): amount = default_amount rspec[resname] = amount # add in sysctl attributes into the rspec for key in attributes.keys(): if key.find("sysctl.") == 0: rspec[key] = attributes[key] # also export tags in rspec so they make it to the sliver_vs.start call rspec['tags']=attributes database.db.deliver_record(rec) if fullupdate: database.db.set_min_timestamp(data['timestamp']) # slivers are created here. database.db.sync()
def sync(self): """Synchronize reality with the database contents. This method does a lot of things, and it's currently called after every single batch of database changes (a GetSlivers(), a loan, a record). It may be necessary in the future to do something smarter.""" # delete expired records now = time.time() for name, rec in self.items(): if rec.get('expires', now) < now: del self[name] self._compute_effective_rspecs() try: coresched = CoreSched() coresched.adjustCores(self) except: logger.log_exc("database: exception while doing core sched") # create and destroy accounts as needed logger.verbose("database: sync : fetching accounts") existing_acct_names = account.all() for name in existing_acct_names: if name not in self: logger.verbose("database: sync : ensure_destroy'ing %s" % name) account.get(name).ensure_destroyed() for name, rec in self.iteritems(): # protect this; if anything fails for a given sliver # we still need the other ones to be handled try: sliver = account.get(name) logger.verbose( "database: sync : looping on %s (shell account class from pwd %s)" % (name, sliver._get_class())) # Make sure we refresh accounts that are running if rec['instantiation'] == 'plc-instantiated': logger.verbose( "database: sync : ensure_create'ing 'instantiation' sliver %s" % name) sliver.ensure_created(rec) elif rec['instantiation'] == 'nm-controller': logger.verbose( "database: sync : ensure_create'ing 'nm-controller' sliver %s" % name) sliver.ensure_created(rec) # Back door to ensure PLC overrides Ticket in delegation. elif rec['instantiation'] == 'delegated' and sliver._get_class( ) != None: # if the ticket has been delivered and the nm-controller started the slice # update rspecs and keep them up to date. if sliver.is_running(): logger.verbose( "database: sync : ensure_create'ing 'delegated' sliver %s" % name) sliver.ensure_created(rec) except: logger.log_exc("database: sync failed to handle sliver", name=name) # Wake up bwmom to update limits. bwmon.lock.set() global dump_requested dump_requested = True db_cond.notify()