Beispiel #1
0
class DatabaseTestCase(unittest.TestCase):
    def setUp(self):
        self.database = Database()
        
    def test_get_monitors(self):
        monitor = self.database.get_monitor('frank')
        monitors = self.database.get_monitors()
        self.assertTrue(monitor in monitors)
        
    def test_get_groups(self):
        group = self.database.get_group_by_name('quae-other')
        groups = self.database.get_groups()
        self.assertTrue(group in groups)
        
    def tearDown(self):
        self.database = None
Beispiel #2
0
class Controller:
    
    """ Controller object. Initializes various data structures used by object. Establishes connection with XMPP server,
    connects to poller, aggregator and logging Multi-User Chats and registers stanza handlers. """
    def __init__(self):

        self.db = Database()

        entity_prefix = 'controller'

        conn = Connection(entity_prefix, static=True)
        self.entity_name, self.entity_suffix = conn.get_entity_name()
        
        # List of nodes known to the controller
        self.poller_map = {}
        self.poller_pool = {}
        
        self.job_map = {}
        self.job_pool = []

        # Message scheduler
        self.sched = MessageScheduler(self.message_handler)
        
        self.log = Logging(conn)
        conn.join_muc('pollers')
        conn.join_muc('aggregators')

        self.parser = Parser()
        
        self.establish_jobs()
        
        self.conn = conn.get_conn()

        self.conn.RegisterHandler('iq',self.result_handler,'result')
        self.conn.RegisterHandler('iq',self.set_handler,'set')
        self.conn.RegisterHandler('presence',self.presence_handler)

        self.go_on()
        
    """ Called by the presence handler when an entity connects to the aggregator or poller MUCs.
    Used to inspect retreive service information from an entity, required in the XEP Jabber-RPC standard. """
    def disco_lookup(self, recipient):
        self.log.info('Performing discovery lookup.')
        message = Iq('get', queryNS=NS_DISCO_INFO, to=recipient)
        self.sched.add_message(message, self.disco_handler)
        
    """ Method passed and used as handler for messages by MessageScheduler.
    Sends messages and logs an error if message send is a retry """
    # Handler used by message scheduling class
    def message_handler(self, message, retry=False):
#        print 'Sending message.'
        if retry == True:
            self.log.error('Message timed out, resending.')
        self.conn.send(message)
#
#   MESSAGE HANDLERS
#
    """ Handler for presence stanzas recieved by the XMPP listener.
    If 
    """
    def presence_handler(self, conn, presence_node):
        sender = presence_node.getFrom()
        presence_type = presence_node.getAttr('type')
        # Ignore self and presence announcements from logging MUC
        if sender.getResource() != 'controller':
            if presence_type == 'unavailable':
                if sender.getNode() == 'aggregators' or sender.getNode() == 'pollers':
                    self.remove_entity(sender.getNode(), sender)
            elif sender.getNode() == 'aggregators' or sender.getNode() == 'pollers':
                # Check the service discovery details for a connecting node.
                self.disco_lookup(sender)
        raise NodeProcessed
        
    """ IQ set handler, runs RPC methods in whitelist """
    def set_handler(self, conn, iq_node):
        query_node = iq_node.getQueryChildren()
        for node in query_node:
            try:
                method = node.getTagData('methodName')
                method_whitelist = ['get_group', 'get_groups', 'create_group', 'update_group', 'remove_group',
                'get_monitor', 'get_monitors', 'create_monitor', 'update_monitor', 'remove_monitor', 'get_monitors_by_gid',
                'get_job', 'get_jobs', 'create_job', 'update_job', 'remove_job',
                'get_evaluation', 'get_evaluations', 'create_evaluation', 'update_evaluation', 'remove_evaluation',
                'get_results', 'get_results_day', 'get_results_week', 'get_results_hour',
                'poller_failure',
                'get_aggregator']
                if method in method_whitelist:
                    method = getattr(self, method)
                    try:
                        try:
                            params = node.getTag('params').getChildren()
                            args = self.parser.get_args(params, iq_node.getFrom())
                        except AttributeError:
                            args = []
                            
                        status, parameters  = apply(method, args)
                        message = self.parser.rpc_response(iq_node.getFrom(), iq_node.getID(), status, parameters)
                        self.conn.send(message)
                    except TypeError:
#                        print sys.exc_info()
                        conn.send(iq_node.buildReply('error'))
                else:
                    conn.send(iq_node.buildReply('error'))
                    self.log.error('Method not in whitelist')
            except AttributeError:
                traceback.print_exc()
                conn.send(iq_node.buildReply('error'))
        raise NodeProcessed

    def result_handler(self, conn, iq_node):
        # Check if the reponse is managed by scheduler
        if self.sched.is_managed(int(iq_node.getAttr('id'))):
            self.sched.received_response(iq_node)
        raise NodeProcessed
#   END MESSAGE HANDLERS

#
# BEGIN SCHEDULER RESPONSE HANDLERS
#
    def disco_handler(self, sender, query_node):
        if query_node.getNamespace() == NS_DISCO_INFO:
            entity_type = query_node.getTagAttr('identity', 'type')
            if entity_type == 'aggregator' or entity_type == 'poller':
                adjusted_jid = JID(sender.getResource() + '@quae.co.uk/skynet')
                category = query_node.getTagAttr('identity', 'category')
                self.log.info('Registering node %s' % adjusted_jid)
                self.add_entity(entity_type, category, adjusted_jid)
        else:
            self.log.error('Receieved iq message with incorrect namespace')

    def assign_job(self, sender, query_node):
        if query_node.getNamespace() == NS_RPC:
            poller, job_id = self.parser.get_args_no_sender(query_node.getTag('methodResponse').getTag('params').getChildren())
            poller_jid = JID(poller)
            job_id = int(job_id)
            job = None
            for i in range(len(self.job_pool)):
                if self.job_pool[i]['id'] == job_id:
                    job = self.job_pool.pop(i)
                    self.log.info('Removing job %s from the job pool' % job_id)
                    break
            if job != None:
                self.job_map[poller_jid].append(job)
                self.log.info('Job %s successfully assigned to %s' % (job_id, poller_jid))
        else:
            self.log.error('Receieved iq message with incorrect namespace')
            
    def poller_removed(self, sender, query_node):
        if query_node.getNamespace() == NS_RPC:
            args = self.parser.get_args_no_sender(query_node.getTag('methodResponse').getTag('params').getChildren())
            adjusted_jid = JID(args[0])
            unassigned_jobs = self.job_map.pop(adjusted_jid)
            for job in unassigned_jobs:
                self.log.info('Adding job %s to the job pool' % job['id'])
                self.job_pool.append(job)
            parent_poller = None
            for aggregator, pollers in self.poller_map.items():
                for poller, segment in pollers:
                    if poller == adjusted_jid:
                        parent_aggregator = aggregator
                        pollers.remove((poller, segment))
                        break
            self.log.info('Removed %s from %s' % (adjusted_jid, parent_aggregator))
            self.assign_pooled_jobs()            
        else:
            self.log.error('Receieved iq message with incorrect namespace')
# END SCHEDULER HANDLERS

#
#   BEGIN RPC METHODS
#
# Requested by an aggregator when an assigned poller has failed/disconnected.
    def poller_failure(self, sender, previous_poller):
        pollers = self.poller_map[JID(sender)]
        poller_jid = JID('[email protected]/' + JID(previous_poller).getNode())
        try:
            pollers.remove(poller_jid)
            message = 'Removed failed poller'
            try:
                self.rebalance_pollers()
            except:
                print sys.exc_info()
            return 'success', [message]
        except:
            return 'failure', ['Failed to remove poller']

# Group operations
    def get_group(self, sender, name):
        group = self.db.get_group_by_name(name)
        if group != None:
            return 'success', [group]
        return 'failure', ['No such group exists']
    
    def get_groups(self, sender):
        groups = self.db.get_groups()
        if groups != None:
            return 'success', [groups]
        return 'failure', ['Failed to retreieve groups']

    def create_group(self, sender, name, desc):
        existing = self.db.get_group_by_name(name)
        if existing == None:
            self.db.create_group(name, desc)
            return 'success', ['Sucessfully created group %s' % name]
        return 'failure', ['failure']
    
    def update_group(self, sender, id, name, desc):
        group = self.db.get_group_by_id(id)
        if group != None:
            self.db.update_group(id, name, desc)
            return 'success', ['Successfully update group %s' % name]
        return 'failure', ['Failed to update group']
        
    def remove_group(self, sender, name):
        self.db.remove_group_by_name(name)
        if self.db.get_group_by_name(name) == None:
            return 'success', ['Successfully remove group %s' % name]
        else:
            return 'failure', ['Failed to remove group %s' % name]
# Monitor operations

    def get_monitor(self, sender, name):
        try:
            monitor = self.db.get_monitor(name)
            if monitor != False:
                return 'success', [monitor]
        except TypeError:
            return 'failure', ['No such monitor exists']
        return 'failure', ['No such monitor exists']
        
    def get_monitors(self, sender, group=None):
        try:
            if group != None:
                monitors = self.db.get_monitors(group)
            else:
                monitors = self.db.get_monitors()
            return 'success', [monitors]
        except AttributeError:
            return 'failure', ['Failed to retrieve monitors']
            
    def get_monitors_by_gid(self, sender, group_id):
        try:
            if group_id != None:
                monitors = self.db.get_monitors_by_gid(group_id)
                return 'success', [monitors]
        except AttributeError:
            pass
        return 'failure', ['Failed to retrieve monitors']
    
    def create_monitor(self, sender, name, description, group):
        if self.db.create_monitor(name, description, group) == True:
            return 'success', ['Successfully create monitor %s' % name]
        return 'failure', ['Failed to create monitor']
        
    def update_monitor(self, sender, name, description, group):
        group = self.db.get_group_by_id(id)
        if group != None:
            self.db.update_group(id, name, desc)
            return 'success', ['Successfully update monitor %s' % name]
        return 'failure', ['Failed to update monitor']
        
    def remove_monitor(self, sender, name):
        self.db.remove_monitor_by_name(name)
        if self.db.get_monitor_by_name(name) == None:
            return 'success', ['Successfully removed monitor %s' % name]
        else:
            return 'failure', ['Failed to remove monitor %s' % name]
        
# job operations

    def get_job(self, sender, mon, id):
        try:
            job = self.db.get_job(id, mon)
            return 'success', [job]
        except TypeError:
            return 'failure', ['No such job exists']
            
    def get_jobs(self, sender, mon):
        try:
            jobs = self.db.get_jobs(mon)
            return 'success', [jobs]
        except AttributeError:
            return 'failure', ['Failed to retreieve jobs']

    def create_job(self, sender, mon, address, protocol, frequency, interface, resource):
        if self.db.get_monitor(mon) != None:
            if self.db.create_job(address, protocol, frequency, interface, resource, mon) == True:
                return 'success', ['Successfully created a job for %s' % mon]
        return 'failure', ['Failed to create job']

    def update_job(self, sender, mon, id, address, protocol, frequency, interface, resource):
        existing = self.db.get_job(id, mon)
        if existing != None:
            self.db.update_job(id, address, protocol, frequency, interface, resource)
            return 'success', ['Successfully updated job']
        else:
            return 'failure', ['failure']
            
    def remove_job(self, sender, mon, id):
        self.db.remove_job(id)
        if self.db.get_job(id) == None:
            return 'success', ['Successfully removed job']
        else:
            return 'failure', ['failure']
        
    # Result read operations
    
    def get_results(self, sender, monitor, job, start_datetime, end_datetime):
        results = self.db.get_results(monitor, job, start_datetime, end_datetime)
        if results == None:
            return 'failure', ['No such results exist']
        elif results != False:
            return 'success', results
        return 'failure', ['Failed to retreive specificied results']
        
    def get_results_day(self, sender, monitor, job, start_datetime):
        results = self.db.get_results_day(monitor, job, start_datetime)
        if results == []:
            return 'failure', ['No such results exist']
        elif results != False:
            job_details = self.db.get_job(job, monitor)
            return 'success', [job_details, results]
        return 'failure', ['Failed to retreive specificied results']
        
    def get_results_week(self, sender, monitor, job, start_datetime):
        pass
    
    def get_results_hour(self, sender, monitor, job, start_datetime):
        results = self.db.get_results_hour(monitor, job, start_datetime)
        if results == []:
            return 'failure', ['No such results exist']
        elif results != False:
            job_details = self.db.get_job(job, monitor)
            return 'success', [job_details, results]
        return 'failure', ['Failed to retreive specificied results']
#   END RPC METHODS

#
#   BEGIN PRIVATE METHODS
#
    """ Called on successful DISCO request.
    Will register Poller or Aggregator, send jobs or balance jobs. """
    def add_entity(self, entity_type, segment, entity):
        if entity_type == 'aggregator':
            self.poller_map[entity] = []
            # If pollers have been added, but there were no aggregators running
            self.assign_pooled_pollers()
            if len(self.poller_map) > 1:
                self.rebalance_pollers()
            self.assign_pooled_jobs()
                
        elif entity_type == 'poller':
            self.job_map[entity] = []
            
            self.poller_pool[entity] = segment
            self.assign_pooled_pollers()
            
            if len(self.poller_map) > 1:
                self.rebalance_pollers()
            
            self.assign_pooled_jobs()
            
            if len(self.job_map) > 1:
                self.rebalance_jobs()
            # Give poller to appropriate aggregator
            print 'Added %s to %ss' % (entity, entity_type)
        self.log.info('Node %s was successfully registered with the controller' % entity)

    """ Removing Poller or Aggregator """
    def remove_entity(self, entity_type, entity):
        try:
            if entity_type == 'aggregators':
                adjusted_jid = JID(JID(entity).getResource() + '@quae.co.uk/skynet')
                unassigned_pollers = self.poller_map.pop(adjusted_jid)
                for poller, segment in unassigned_pollers:
                    self.poller_pool[poller] = segment
                self.log.info('Removed %s' % adjusted_jid)
                if len(self.poller_pool) > 0:
                    # Try and assign pooled pollers
                    if not self.assign_pooled_pollers():
                        for poller, segment in unassigned_pollers:
                            message = self.parser.rpc_call(poller, 'aggregator_failure', [])
                            self.sched.add_message(message)
                    
            elif entity_type == 'pollers':
                adjusted_jid = JID(JID(entity).getResource() + '@quae.co.uk/skynet')
                if len(self.poller_map) > 0:
                    parent_aggregator = None
                    for aggregator, pollers in self.poller_map.items():
                        for poller, segment in pollers:
                            if adjusted_jid == poller:
                                parent_aggregator = aggregator
                                break
                    if parent_aggregator != None:
                        remove_call = self.parser.rpc_call(parent_aggregator, 'remove_poller', [str(adjusted_jid)])
                        self.sched.add_message(remove_call, self.poller_removed)
                else:
                    try:
                        self.job_map.pop(adjusted_jid)
                        self.poller_pool.pop(adjusted_jid)
                        self.log.info('Poller not assigned, sucessfully removed')
                    except:
                        self.log.error('Failed to remove poller')
                        traceback.print_exc()
        except ValueError:
            self.log.error('Failed to remove %s' % entity)

    """ Assign unassigned pollers """
    def assign_pooled_pollers(self):
        if len(self.poller_map) > 0:
            while len(self.poller_pool) > 0:
                unassigned_poller, segment = self.poller_pool.popitem()
                chosen_aggregator = None
                poller_comp = None
                for aggregator, pollers in self.poller_map.items():
                    # If first loop or number of pollers assigned to aggregator is less than comp, make this agg the comp
                    if (chosen_aggregator == None and poller_comp == None) or len(pollers) < poller_comp:
                        chosen_aggregator = aggregator
                        poller_comp = len(pollers)
                if chosen_aggregator != None:
                    # Assign Poller to the Aggregtor with least assigned Pollers
                    
                    message = self.parser.rpc_call(chosen_aggregator, 'add_poller', [str(unassigned_poller)])
                    self.sched.add_message(message)
                    
                    for job in self.job_map[unassigned_poller]:
                        message = self.parser.rpc_call(chosen_aggregator, 'move_job', [str(unassigned_poller), job['id'], job['address'], job['protocol'], job['frequency'], job['interface'], job['resource'], job['segment']])
                        self.sched.add_message(message)
                    self.poller_map[chosen_aggregator].append((unassigned_poller, segment))
            return True
        else:
            self.log.info('No aggregators available for poller assignment')
            return False
    
    """ Get Pollers for a given network segment """
    def get_segment_pollers(self, segment):
        segment_pollers = {}
        for aggregator, pollers in self.poller_map.items():
            for poller, poller_segment in pollers:
                if poller_segment == segment:
                    segment_pollers[poller] = self.job_map[poller]
        return segment_pollers
        
    """ Called to allocate unassigned jobs """
    def assign_pooled_jobs(self):
        if len(self.job_map) > 0 and len(self.poller_map) > 0:
            for job in self.job_pool:
                unassigned_job = job
                least_loaded = None
                job_comp = None
                pollers = self.get_segment_pollers(unassigned_job['segment'])
                for poller, jobs in pollers.items():
#                for poller, jobs in self.job_map.items():
                    if (least_loaded == None and job_comp == None) or len(jobs) < job_comp:
                        least_loaded = poller
                        job_comp = len(jobs)
                if least_loaded != None:
                    chosen_aggregator = None
                    for aggregator, pollers in self.poller_map.items():
                        for poller, segment in pollers:
                            if poller == least_loaded:
                                chosen_aggregator = aggregator
                                break
                    if chosen_aggregator != None:
                        self.send_job(unassigned_job, least_loaded, chosen_aggregator)
        else:
            self.log.info('No assigned pollers available for job assignment')
        #print 'Job map %s' % self.job_map
        #print 'Job pool %s' % self.job_pool
    
    """ Rebalance pollers, compares amount assigned to each Aggregator, and moves across to
    another Poller if there's at least 2 more than another Aggregator """
    def rebalance_pollers(self):
        self.log.info('Attempting to rebalance pollers')

        poller_comp = None
        least_pollers = None
        most_pollers = None
        # Retrieve aggregators with least and most pollers
        
        for aggregator, pollers in self.poller_map.items():
            if poller_comp == None:
                least_pollers = aggregator
                most_pollers = aggregator
            elif len(pollers) < poller_comp:
                least_pollers = aggregator
            elif len(pollers) > poller_comp:
                most_pollers = aggregator
            poller_comp = len(pollers)
        
        if least_pollers != None and most_pollers != None:
            # If the difference between the two pollers is worth balancing
            if (len(self.poller_map[most_pollers]) - len(self.poller_map[least_pollers])) > 1:
                poller, segment = self.poller_map[most_pollers].pop()
                self.poller_map[least_pollers].append((poller, segment))
                self.sched.add_message(self.parser.rpc_call(least_pollers, 'add_poller', [str(poller)]))
                self.sched.add_message(self.parser.rpc_call(most_pollers, 'remove_poller', [str(poller)]))
                self.rebalance_pollers()
            else:
                self.log.info('Pollers balanced')
                return True
               
    """ Similar to above, checks number of assigned jobs through the system, and will level them across all Pollers """ 
    def rebalance_jobs(self):
        self.log.info('Attempting to rebalance jobs')
        job_comp = None
        least_jobs = None
        most_jobs = None
        
        network_segment = 'skynet'
        pollers = self.get_segment_pollers(network_segment)
        
#        for poller, jobs in self.job_map.items():
        for poller, jobs in pollers.items():
            print poller
            if job_comp == None:
                least_jobs = poller
                most_jobs = poller
            elif len(jobs) < job_comp:
                least_jobs = poller
            elif len(jobs) > job_comp:
                most_jobs = poller
            job_comp = len(jobs)
            
        if least_jobs != None and most_jobs != None:
            if (len(self.job_map[most_jobs]) - len(self.job_map[least_jobs])) > 1:
                job = self.job_map[most_jobs].pop()
                self.job_map[least_jobs].append(job)
                least_parent = None
                most_parent = None
                for aggregator, pollers in self.poller_map.items():
                    for poller, node_segment in pollers:
                        if network_segment == node_segment:
                            if poller == least_jobs:
                                least_parent = aggregator
                            if poller == most_jobs:
                                most_parent = aggregator
                            if least_parent != None and most_parent != None:
                                break
                    
                if least_parent != None and most_parent != None:
                    self.log.info('Moving job %s to %s' % (job['id'], least_jobs))
                    self.sched.add_message(self.parser.rpc_call(most_parent, 'remove_job', [job['id']]))
                    self.sched.add_message(self.parser.rpc_call(least_parent, 'run_job', [str(least_jobs), job['id'], job['address'], job['protocol'], job['frequency'], job['interface'], job['resource']]), offset=True)
                self.rebalance_jobs()
            else:
                self.log.info('Jobs balanced')
                return True
                
    """ Retrieve jobs on startup """    
    def establish_jobs(self):
        monitors = self.db.get_monitors()
        self.log.info('Retrieving jobs')
        for monitor in monitors:
            jobs = self.db.get_jobs(monitor.name)
            for job in jobs:
                job = dict(job)
                segment_name = self.db.get_segment_name(job['segment'])
                job['segment'] = segment_name
                # Make the poll freq stored every minute minimum
                job['frequency'] = job['frequency'] * 60
                self.job_pool.append(job)
        self.log.info('%s jobs added to the pool' % len(self.job_pool))
        
    """ Send job to Aggregator to forward to Poller """
    def send_job(self, job, poller, aggregator):
        message = self.parser.rpc_call(aggregator, 'run_job', [str(poller), job['id'], job['address'], job['protocol'], job['frequency'], job['interface'], job['resource']])
        self.log.info('Sending job %s to %s' % (job['id'], aggregator))
        self.sched.add_message(message, self.assign_job, offset=True)
        
    def step_on(self):
        try:
            self.conn.Process(1)
        except KeyboardInterrupt: return 0
        return 1

    def go_on(self):
        while self.step_on(): pass