def __init__(self, options): ''' Constructor, requires a options object (as created by OptionParser) f.ex: {'debug':False, 'node': None, 'forced': False, 'hardreboot': False, 'allForced': False, 'chassis': None, 'state': True, 'down': False, 'all': False, 'ack': None, 'idle': False, 'runcmd': None, 'cluster': None, 'poweroff': False, 'pbsmomcleanup': False, 'pbsmomstop': False, 'pbsmomstatus': False, 'debug': False, 'postpoweron': False, 'offline': False, 'poweron': False, 'test_run': False, 'pbsmomrestart': False, 'Master':None,} ''' self.log = fancylogger.getLogger(self.__class__.__name__) self.log.debug("manager created") self.status = None self.comment = None self.log.debug("options: %s" % options.__dict__) self.options = options # get cluster if not options.cluster: self.cluster = Cluster.getDefaultCluster() self.log.warning("Selected cluster %s as default cluster" % self.cluster) else: self.cluster = Cluster.getCluster(options.cluster) self.log.debug("creating cluster: %s" % self.cluster) # group by chassis self.group_by_chassis = (hasattr(self.cluster, "group_by_chassis") and self.cluster.group_by_chassis) # get nodes from cluster self.nodes = self.getNodes() self.log.info("selected nodes on cluster %s: %s" % (self.cluster, self.nodes)) # monitoring service self.monitoring = Icinga(self.nodes.getNodes(), options.imms) # parse action(s) self.parseActions()
class Manager(object): ''' This class is used to manage the UGent HPC clusters adding a new cluster involves creating a new extension of the Cluster class here, set the NodeClass you'll probably have to create a new extension of node to implement what you want to do here ''' def __init__(self, options): ''' Constructor, requires a options object (as created by OptionParser) f.ex: {'debug':False, 'node': None, 'forced': False, 'hardreboot': False, 'allForced': False, 'chassis': None, 'state': True, 'down': False, 'all': False, 'ack': None, 'idle': False, 'runcmd': None, 'cluster': None, 'poweroff': False, 'pbsmomcleanup': False, 'pbsmomstop': False, 'pbsmomstatus': False, 'debug': False, 'postpoweron': False, 'offline': False, 'poweron': False, 'test_run': False, 'pbsmomrestart': False, 'Master':None,} ''' self.log = fancylogger.getLogger(self.__class__.__name__) self.log.debug("manager created") self.status = None self.comment = None self.log.debug("options: %s" % options.__dict__) self.options = options # get cluster if not options.cluster: self.cluster = Cluster.getDefaultCluster() self.log.warning("Selected cluster %s as default cluster" % self.cluster) else: self.cluster = Cluster.getCluster(options.cluster) self.log.debug("creating cluster: %s" % self.cluster) # group by chassis self.group_by_chassis = (hasattr(self.cluster, "group_by_chassis") and self.cluster.group_by_chassis) # get nodes from cluster self.nodes = self.getNodes() self.log.info("selected nodes on cluster %s: %s" % (self.cluster, self.nodes)) # monitoring service self.monitoring = Icinga(self.nodes.getNodes(), options.imms) # parse action(s) self.parseActions() def doit(self): """ do the actual actions This will run al commands scheduled on the selected nodes AND this clusters master node. (only the things you scheduled on the master node) output is an array of arrays of nodes and an array of their commands and an array of out,err f.ex [ [node001, [[command1,[out,err]], [command2,[out,err]]] ], [node002,[[command1,[out,err]]]] ] """ # check for special nodes if self.hasSpecials(): self.log.info("Selected nodes include special nodes (storage, masters,...)") if not self.options.forced: msg = "You are selecting special nodes (storage, masters,...) "\ "without the --forced option\nAborting" self.log.warning(msg) return False # TODO: check for special actions, just checking one right now # we might want to check for not doing anything special on a node # where the scheduler is running on. if self.options.restart and not self.options.forced: msg = "You trying to restart the scheduler"\ "without the --forced option, do you know what you are doing?\nAborting" self.log.warning(msg) return False # add the master if it was not selected. this is ok, nothing special should have # been queued on it. if not self.nodes.contains(self.cluster.getMaster().nodeid): self.log.info("Master not in selected nodes, adding it") # this is ok, since only options regarding the master will be queued on it. self.nodes.add(self.cluster.getMaster()) commands = self.nodes.showCommands() commands.append(self.monitoring.showCommands()) if self.options.test_run: msg = "was going to run %s\n" % commands self.log.info(msg) print msg return False else: self.log.info("Going to run: %s" % commands) monout = self.monitoring.doIt() self.log.debug("monitoring output: %s " % (monout)) out = self.nodes.doIt(not self.options.non_threaded, group_by_chassis=self.group_by_chassis) out.append(monout) self.log.info("Done it") return out def getNodes(self): """ gets the nodes defined by this manager object """ options = self.options cluster = self.cluster nodes = CompositeNode() if options.chassis: self.log.debug("option chassis: %s" % options.chassis) nodes.union(cluster.getNodesFromChassis(options.chassis, options.quattor)) if options.down: self.log.debug("option down") nodes.union(cluster.getDownNodes()) if options.all_nodes: self.log.debug("option all") tnodes = cluster.getAllNodes(quattor=bool(options.quattor)) self.log.debug("Selecting all nodes: %s" % tnodes) nodes.union(tnodes) if options.worker: self.log.debug("option worker nodes") nodes.union(cluster.getWorkerNodes(quattor=bool(options.quattor))) if options.idle: self.log.debug("option idle") nodes.union(cluster.getIdleNodes()) if options.offline: self.log.debug("option offline") nodes.union(cluster.getOfflineNodes()) if options.storage: self.log.debug("found --storage option: %s" % options.master) self.log.warning("--storage not implemented yet") if options.master: # find master self.log.debug("found --master option: %s" % options.master) tnodes = re.findall("master\d*", options.master) if tnodes: self.log.debug("found master specifier %s" % tnodes) for nodeid in tnodes: try: masters = cluster._getMasters() self.log.debug('got masters %s' % masters) self.log.debug('getting masters %s' % nodeid) node = masters.get(nodeid) self.log.debug("adding master %s" % node) nodes.add(node) except NodeException, ex: self.log.warning("could not add master node %s : %s" % (nodeid, ex)) if options.node: self.log.debug("found --node option: %s" % options.node) for nodeid in self._parseNodes(options.node): try: nodes.add(self._getNode(nodeid)) self.log.debug("added node %s" % nodeid) except NodeException, e: self.log.warning("Could not find %s: %s" % (nodeid, e))