def _getLocation(self): """ find location,chassis of this node using quattor """ path = self._getQuattorPath() filename = path.split("/")[-1] if "xml" in filename: location = self._getQuattorElementFromXML(get_config("LOCATION_XPATH"), path) elif "json" in filename: location = self._getQuattorElementFromJSON(get_config("LOCATION_JSON"), path) self.log.debug("location: %s" % location) content1 = re.search(get_config("QUATTOR_LOCATION_STRING_REGEX"), location) if len(location) < 1 or not content1: self.log.debug("No chassis and slot location found for node %s in %s" % (self, get_config("QUATTOR_PATH"))) return location, "None" # parse content values1 = content1.groupdict() chassis = int(values1["chassis"]) slot = int(values1["slot"]) chassisname = get_config("CHASISNAME_TPL") % {"chassisname": chassis, "clustername": self.clustername} return slot, chassisname
def __init__(self, nodeid, clustername, masternode): Node.__init__(self, nodeid, clustername, masternode) self.log.debug("creating Bladenode") self.slot = self.getSlot() self.shassishost = get_config('CHASIS_HOST_TPL') % {'chasisname': self.getChassis(), 'clustername': self.clustername} self.immname = self.chassisname self.immmonitoring = get_config('ICINGA_BLADE_IMM_TPL') % { 'chassisname': self.chassisname, 'clustername': self.clustername, } self.softpoweroffCommand = BladeSoftPoweroffCommand(chassisname=self.shassishost, slot=self.slot, ) self.poweroffCommand = BladePoweroffCommand(chassisname=self.shassishost, slot=self.slot) self.poweronCommand = BladePoweronCommand(chassisname=self.shassishost, slot=self.slot) self.rebootCommand = BladeRebootCommand(chassisname=self.shassishost, slot=self.slot) self.statusCommand = FullBladeStatusCommand(host=self.hostname, masternode=self.getMaster(), chassisname=self.shassishost, slot=self.slot, ) self.ledoffcommand = NotSupportedCommand("ledoff") self.ledoncommand = NotSupportedCommand("ledon")
def _getQuattorPath(self): """Gets and checks the quattor path of this node""" path = os.path.join(get_config('QUATTOR_PATH'), get_config('QUATTOR_FILES_TPL') % {'nodeid': self.nodeid, 'clustername': self.clustername}) if not os.path.exists(path): self.log.raiseException("No file found for node %s in %s" % (self, path), NodeException) return path
def _getMasterNodeIds(self): """ get a set of all master node ids in this cluster using the quattor dir """ regex = re.compile(get_config("QUATTOR_FILES_TPL") % {'nodeid': get_config("QUATTOR_MASTERID_REGEX"), 'clustername': self.name}) nodenames = self._getNodeIds(regex) self.log.debug("master id's for %s: %s" % (self.name, str(nodenames))) return nodenames
def _getStorageNodeIds(self): """ return a set of all storage node id's in this cluster """ regex = re.compile(get_config("QUATTOR_FILES_TPL") % {'nodeid': get_config("QUATTOR_STORAGEID_REGEX"), 'clustername': self.name}) nodenames = self._getNodeIds(regex) self.log.debug("storage id's for %s: %s" % (self.name, str(nodenames))) return nodenames
def _getWorkerNodeIdsFromQuattor(self): """ get a set of all node id's in this cluster, using the quattor dir naming """ # create regex from template template = get_config("QUATTOR_FILES_TPL") % {"clustername": self.name, "nodeid": get_config("QUATTOR_NODEID_REGEX")} regex = re.compile(template) return self._getNodeIds(regex)
def __init__(self, message=None): """ constructor, overwrites empty exception message with the quattor path """ if not message: message = "Could not find quattor dir %s" % get_config("QUATTOR_PATH") Exception.__init__(self, message)
def getMaster(self): """ returns a master of this cluster This will first make sure the master is having a working pbs installation, and try the next one if this fails """ # cache this function if self.master: return self.master masters = self._getMasters() if not masters: raise ClusterException("Could not get masterNode for %s, check your quattor configuration" % self.name) masters = masters.getNodes() if not masters: raise ClusterException("Could not get masterNode for %s, check your quattor configuration" % self.name) for master in masters: # check if this master gives us a valid pbsnodes response out, err = PBSStateCommand(master.hostname, get_config("COMMAND_FAST_TIMEOUT")).run() if not err: self.master = master return master self.log.warning("Cound not get a working master for %s, make sure pbs is working on it, will conitinue without" "working master" % self.name) self.master = masters[0] return masters[0]
def __init__(self, clustername=None, masternode=None, nodeid=None, timeout=None): Node.__init__(self, nodeid, clustername, masternode) # we're not a real node, so no id self.nodes = {} self.threads = None if timeout is None: # times 2 to give other commands to timeout before we timeout here timeout = int(get_config('COMMAND_TIMEOUT')) * 2 self.timeout = timeout
def _getLocation(self): """ find location,chassis of this node using quattor """ path = self._getQuattorPath() location = self._getQuattorElementFromJSON(get_config("LOCATION_JSON"), path) self.log.debug("location: %s" % location) content1 = re.search(get_config("QUATTOR_LOCATION_STRING_REGEX"), location) if len(location) < 1 or not content1: self.log.debug("No chassis and slot location found for node %s in %s" % (self, get_config("QUATTOR_PATH"))) return location, "None" # parse content values1 = content1.groupdict() chassis = int(values1['chassis']) slot = int(values1['slot']) chassisname = get_config("CHASISNAME_TPL") % {'chassisname': chassis, 'clustername': self.clustername} return slot, chassisname
def _getNodeIds(self, regex): """ returns a list of id's. based on a regex this regex should have a named group id and represents a file in the quattor dir """ if not os.path.exists(get_config("QUATTOR_PATH")): self.log.raiseException("Path %s not found, is this not a quattor server?" % get_config("QUATTOR_PATH"), QuattorException) filelistAll = os.listdir(get_config("QUATTOR_PATH")) self.log.debug("matching files for regex %s" % regex.pattern) nodenames = [] for filename in filelistAll: # The compiled versions of the most recent patterns passed to re.match(), re.search() or re.compile() are cached, # so programs that use only a few regular expressions at a time needn't worry about compiling regular expressions. m = regex.match(filename) if m: nodenames.append(m.group('id')) self.log.debug("matched filename: %s" % filename) # we might have doubles nodenames = sorted(set(nodenames)) return nodenames
def __init__(self, nodeid, clustername, masternode, commands=None): """ constructor all real nodes have an id """ Worker.__init__(self, commands=commands) self.log = fancylogger.getLogger(self.__class__.__name__) self.log.debug("creating Node %s" % nodeid) self.nodeid = nodeid self.status = None self.clustername = clustername self.slot = None self.customcmd = None self.chassisname = None self.masternode = masternode # TODO: (medium) allow for initializing this with the commands thing above # so no overwriting is needed, these can be parsed from quattor # see ticket 469 # defaults self.customCommandClass = SshCommand self.hostname = get_config('HOST_TPL') % {"nodeid": nodeid, "clustername": clustername} self.immname = get_config('IMM_TPL') % {"nodeid": nodeid, "clustername": clustername} self.immmonitoring = None self.softpoweroffCommand = SoftPoweroffCommand(self.hostname) self.softrebootCommand = SoftRebootCommand(self.hostname) # not implemented - use this command # Overwrite these in extensions # # self.poweronCommand = None # self.poweroffCommand = None # self.ledoffcommand = NotSupportedCommand("ledoff") # self.ledoncommand = NotSupportedCommand("ledon") self.statusCommand = None self.rebootCommand = None
def __init__(self, nodeid, clustername, masternode): Node.__init__(self, nodeid, clustername, masternode) self.log.debug("creating ImmNode") host = self.hostname adminhost = self.immname self.immmonitoring = get_config("ICINGA_IDPX_IMM_TPL") % {"nodeid": self.nodeid} self.statusCommand = FullImmStatusCommand(host, adminhost, self.getMaster()) self.softpoweroffCommand = ImmSoftPoweroffCommand(adminhost) self.poweronCommand = ImmPoweronCommand(adminhost) self.poweroffCommand = ImmPoweroffCommand(adminhost) self.rebootCommand = ImmRebootCommand(adminhost) self.softrebootCommand = ImmSoftRebootCommand(adminhost) self.ledoffcommand = NotSupportedCommand("ledoff") self.ledoncommand = NotSupportedCommand("ledon")
def setonline(self): """ run setonline on all nodes in this compositenode """ nodeonlinelist = [] for node in self.getNodes(): nodeonlinelist.append(get_config('NODENAME_TPL') % {'nodeid': node.nodeid, 'clustername': node.clustername}) if len(nodeonlinelist) < 1: self.log.raiseException("No nodes selected to set online", NodeException) master = self.getMaster() statusses = [] statusses.append(master.setonline(nodeonlinelist)) self.log.debug("setonline on compositenode returned %s" % statusses) return statusses
def fixdownonerror(self): """ run fixdownonerror on all nodes in this compositenode - remove all healtscripts on the nodes - clear the message on the nodes now """ nodelist = [] statusses = [] for node in self.getNodes(): nodelist.append(get_config('NODENAME_TPL') % {'nodeid': node.nodeid, 'clustername': node.clustername}) statusses.append(node.fixdownonerror()) if len(nodelist) < 1: self.log.raiseException("No nodes selected to fix downonerror on", NodeException) self.log.debug("fixdownonerror on compositenode returned %s" % statusses) return statusses
def _doThreading(self, method, args=None, group_by_chassis=False, timeout=None): """ give this method a methodname and optional arguments it will perform it threaded on all nodes in this compositenode If group_by_chassis is given only one thread per chassis is started (default False) """ if self.threads: self.log.raiseException("Trying to do 2 threaded operations at the same time,", " this is not allowed!") self.threads = [] outputs = [] if not timeout: timeout = int(get_config('COMMAND_TIMEOUT')) + 2 # creating threads and getting results as discussed here: # http://stackoverflow.com/questions/3239617/how-to-manage-python-threads-results if group_by_chassis: group = self.getNodesPerChassis() else: group = self for node in group.getNodes(): # commands are ran in parrallel, but serial on each node # TODO (high): group by chassis to avoid overloading! out = [] self.log.debug("running %s on %s with args: %s" % (method, node, args)) t, out = _dothreading(node, method, args) # TODO: use a thread pool? self.threads.append([t, out]) t.start() for t, out in self.threads: # TODO: (low) print progress? http://stackoverflow.com/questions/3160699/python-progress-bar t.join(timeout) if t.is_alive(): self.log.warning("thread %s on node %s did not complete within timeout, ignoring it", t, str(out)) if len(out) < 2: out.extend(['Command timed out', 256]) outputs.append(out) continue # get result from each thread and append it to the result here self.log.debug("thread %s on node %s completed, result: %s" % (t, out[0], out[1])) if out[2]: self.log.warning("thread %s on node %s completed with an error: %s" % (t, out[0], out[2])) outputs.append(out) self.threads = None # delete threads return outputs
# ## ''' Created on Oct 18, 2011 @author: Jens Timmerman ''' import os import traceback from unittest import TestCase, TestLoader from vsc.manage.config import Options, get_config from vsc.manage.manage import Manager from vsc.manage.clusters import Cluster, NoSuchClusterException from vsc.manage.nodes import NodeException QUATTOR_PATH = get_config("QUATTOR_PATH") class ManageTest(TestCase): def setUp(self): pass def tearDown(self): pass #TODO: add tests for options.pause options.resume and options.restart def testChedulerOptions(self): """ test the cheduler options