Ejemplo n.º 1
0
    def __init__(self, options):
        '''
        Constructor,
        requires a options object (as created by OptionParser)
        f.ex:
        {'debug':False, 'node': None, 'forced': False, 'hardreboot': False, 'allForced': False, 'chassis': None, 'state': True,
        'down': False, 'all': False, 'ack': None, 'idle': False, 'runcmd': None,
        'cluster': None, 'poweroff': False, 'pbsmomcleanup': False, 'pbsmomstop': False,
        'pbsmomstatus': False, 'debug': False, 'postpoweron': False, 'offline': False,
        'poweron': False, 'test_run': False, 'pbsmomrestart': False, 'Master':None,}
        '''
        self.log = fancylogger.getLogger(self.__class__.__name__)
        self.log.debug("manager created")
        self.status = None
        self.comment = None
        self.log.debug("options: %s" % options.__dict__)
        self.options = options

        # get cluster
        if not options.cluster:
            self.cluster = Cluster.getDefaultCluster()
            self.log.warning("Selected cluster %s as default cluster" % self.cluster)
        else:
            self.cluster = Cluster.getCluster(options.cluster)

        self.log.debug("creating cluster: %s" % self.cluster)

        # group by chassis
        self.group_by_chassis = (hasattr(self.cluster, "group_by_chassis") and self.cluster.group_by_chassis)

        # get nodes from cluster
        self.nodes = self.getNodes()
        self.log.info("selected nodes on cluster %s: %s" % (self.cluster, self.nodes))

        # monitoring service
        self.monitoring = Icinga(self.nodes.getNodes(), options.imms)

        # parse action(s)
        self.parseActions()
Ejemplo n.º 2
0
class Manager(object):
    '''
    This class is used to manage the UGent HPC clusters

    adding a new cluster involves creating a new extension of the Cluster class
    here, set the NodeClass
    you'll probably have to create a new extension of node to implement what you want to do here
    '''

    def __init__(self, options):
        '''
        Constructor,
        requires a options object (as created by OptionParser)
        f.ex:
        {'debug':False, 'node': None, 'forced': False, 'hardreboot': False, 'allForced': False, 'chassis': None, 'state': True,
        'down': False, 'all': False, 'ack': None, 'idle': False, 'runcmd': None,
        'cluster': None, 'poweroff': False, 'pbsmomcleanup': False, 'pbsmomstop': False,
        'pbsmomstatus': False, 'debug': False, 'postpoweron': False, 'offline': False,
        'poweron': False, 'test_run': False, 'pbsmomrestart': False, 'Master':None,}
        '''
        self.log = fancylogger.getLogger(self.__class__.__name__)
        self.log.debug("manager created")
        self.status = None
        self.comment = None
        self.log.debug("options: %s" % options.__dict__)
        self.options = options

        # get cluster
        if not options.cluster:
            self.cluster = Cluster.getDefaultCluster()
            self.log.warning("Selected cluster %s as default cluster" % self.cluster)
        else:
            self.cluster = Cluster.getCluster(options.cluster)

        self.log.debug("creating cluster: %s" % self.cluster)

        # group by chassis
        self.group_by_chassis = (hasattr(self.cluster, "group_by_chassis") and self.cluster.group_by_chassis)

        # get nodes from cluster
        self.nodes = self.getNodes()
        self.log.info("selected nodes on cluster %s: %s" % (self.cluster, self.nodes))

        # monitoring service
        self.monitoring = Icinga(self.nodes.getNodes(), options.imms)

        # parse action(s)
        self.parseActions()

    def doit(self):
        """
        do the actual actions
        This will run al commands scheduled on the selected nodes
        AND this clusters master node.
        (only the things you scheduled on the master node)
        output is an array of arrays of nodes and an array of their commands and an array of out,err

        f.ex
        [
        [node001,
            [[command1,[out,err]],
             [command2,[out,err]]]
        ],
        [node002,[[command1,[out,err]]]]
        ]
        """
        # check for special nodes
        if self.hasSpecials():
            self.log.info("Selected nodes include special nodes (storage, masters,...)")
            if not self.options.forced:
                msg = "You are selecting special nodes (storage, masters,...) "\
                      "without the --forced option\nAborting"
                self.log.warning(msg)
                return False

        # TODO: check for special actions, just checking one right now
        # we might want to check for not doing anything special on a node
        # where the scheduler is running on.
        if self.options.restart and not self.options.forced:
                msg = "You trying to restart the scheduler"\
                      "without the --forced option, do you know what you are doing?\nAborting"
                self.log.warning(msg)
                return False

        # add the master if it was not selected. this is ok, nothing special should have
        # been queued on it.
        if not self.nodes.contains(self.cluster.getMaster().nodeid):
            self.log.info("Master not in selected nodes, adding it")
            # this is ok, since only options regarding the master will be queued on it.
            self.nodes.add(self.cluster.getMaster())

        commands = self.nodes.showCommands()
        commands.append(self.monitoring.showCommands())
        if self.options.test_run:
            msg = "was going to run %s\n" % commands
            self.log.info(msg)
            print msg
            return False
        else:
            self.log.info("Going to run: %s" % commands)

        monout = self.monitoring.doIt()
        self.log.debug("monitoring output: %s " % (monout))

        out = self.nodes.doIt(not self.options.non_threaded, group_by_chassis=self.group_by_chassis)
        out.append(monout)
        self.log.info("Done it")
        return out

    def getNodes(self):
        """
        gets the nodes defined by this manager object
        """
        options = self.options
        cluster = self.cluster
        nodes = CompositeNode()
        if options.chassis:
            self.log.debug("option chassis: %s" % options.chassis)
            nodes.union(cluster.getNodesFromChassis(options.chassis, options.quattor))
        if options.down:
            self.log.debug("option down")
            nodes.union(cluster.getDownNodes())
        if options.all_nodes:
            self.log.debug("option all")
            tnodes = cluster.getAllNodes(quattor=bool(options.quattor))
            self.log.debug("Selecting all nodes: %s" % tnodes)
            nodes.union(tnodes)

        if options.worker:
            self.log.debug("option worker nodes")
            nodes.union(cluster.getWorkerNodes(quattor=bool(options.quattor)))

        if options.idle:
            self.log.debug("option idle")
            nodes.union(cluster.getIdleNodes())
        if options.offline:
            self.log.debug("option offline")
            nodes.union(cluster.getOfflineNodes())

        if options.storage:
            self.log.debug("found --storage option: %s" % options.master)
            self.log.warning("--storage not implemented yet")

        if options.master:
            # find master
            self.log.debug("found --master option: %s" % options.master)
            tnodes = re.findall("master\d*", options.master)
            if tnodes:
                self.log.debug("found master specifier %s" % tnodes)
                for nodeid in tnodes:
                    try:
                        masters = cluster._getMasters()
                        self.log.debug('got masters %s' % masters)

                        self.log.debug('getting masters %s' % nodeid)
                        node = masters.get(nodeid)
                        self.log.debug("adding master %s" % node)
                        nodes.add(node)
                    except NodeException, ex:
                        self.log.warning("could not add master node %s : %s" % (nodeid, ex))
        if options.node:
            self.log.debug("found --node option: %s" % options.node)
            for nodeid in self._parseNodes(options.node):
                try:
                    nodes.add(self._getNode(nodeid))
                    self.log.debug("added node %s" % nodeid)
                except NodeException, e:
                    self.log.warning("Could not find %s: %s" % (nodeid, e))