コード例 #1
0
class Controller(object):
    
    @staticmethod
    def fail_cb(manager, entrypoint, exception):
        sandesh_global._logger.info("Load failed for %s with exception %s" % \
                                     (str(entrypoint),str(exception)))
        
    def __init__(self, conf):
        self._conf = conf
        module = Module.ALARM_GENERATOR
        self._moduleid = ModuleNames[module]
        node_type = Module2NodeType[module]
        self._node_type_name = NodeTypeNames[node_type]
        self._hostname = socket.gethostname()
        self._instance_id = self._conf.worker_id()
        sandesh_global.init_generator(self._moduleid, self._hostname,
                                      self._node_type_name, self._instance_id,
                                      self._conf.collectors(), 
                                      self._node_type_name,
                                      self._conf.http_port(),
                                      ['opserver.sandesh', 'sandesh'])
        sandesh_global.set_logging_params(
            enable_local_log=self._conf.log_local(),
            category=self._conf.log_category(),
            level=self._conf.log_level(),
            file=self._conf.log_file(),
            enable_syslog=self._conf.use_syslog(),
            syslog_facility=self._conf.syslog_facility())
        self._logger = sandesh_global._logger

        # Trace buffer list
        self.trace_buf = [
            {'name':'DiscoveryMsg', 'size':1000}
        ]
        # Create trace buffers 
        for buf in self.trace_buf:
            sandesh_global.trace_buffer_create(name=buf['name'], size=buf['size'])

        tables = [ "ObjectCollectorInfo",
                   "ObjectDatabaseInfo",
                   "ObjectVRouter",
                   "ObjectBgpRouter",
                   "ObjectConfigNode" ] 
        self.mgrs = {}
        self.tab_alarms = {}
        for table in tables:
            self.mgrs[table] = hook.HookManager(
                namespace='contrail.analytics.alarms',
                name=table,
                invoke_on_load=True,
                invoke_args=(),
                on_load_failure_callback=Controller.fail_cb
            )
            
            for extn in self.mgrs[table][table]:
                self._logger.info('Loaded extensions for %s: %s,%s' % \
                    (table, extn.name, extn.entry_point_target))

            self.tab_alarms[table] = {}

        ConnectionState.init(sandesh_global, self._hostname, self._moduleid,
            self._instance_id,
            staticmethod(ConnectionState.get_process_state_cb),
            NodeStatusUVE, NodeStatus)

        self._us = UVEServer(None, self._logger, self._conf.redis_password())

        self._workers = {}

        self.disc = None
        self._libpart_name = self._hostname + ":" + self._instance_id
        self._libpart = None
        self._partset = set()
        if self._conf.discovery()['server']:
            import discoveryclient.client as client 
            data = {
                'ip-address': self._hostname ,
                'port': self._instance_id
            }
            self.disc = client.DiscoveryClient(
                self._conf.discovery()['server'],
                self._conf.discovery()['port'],
                ModuleNames[Module.ALARM_GENERATOR])
            self._logger.info("Disc Publish to %s : %s"
                          % (str(self._conf.discovery()), str(data)))
            self.disc.publish(ALARM_GENERATOR_SERVICE_NAME, data)
        else:
            # If there is no discovery service, use fixed redis_uve list
            redis_uve_list = []
            try:
                for redis_uve in self._conf.redis_uve_list():
                    redis_ip_port = redis_uve.split(':')
                    redis_ip_port = (redis_ip_port[0], int(redis_ip_port[1]))
                    redis_uve_list.append(redis_ip_port)
            except Exception as e:
                self._logger.error('Failed to parse redis_uve_list: %s' % e)
            else:
                self._us.update_redis_uve_list(redis_uve_list)

            # If there is no discovery service, use fixed alarmgen list
            self._libpart = self.start_libpart(self._conf.alarmgen_list())

        PartitionOwnershipReq.handle_request = self.handle_PartitionOwnershipReq
        PartitionStatusReq.handle_request = self.handle_PartitionStatusReq
        UVETableAlarmReq.handle_request = self.handle_UVETableAlarmReq 

    def libpart_cb(self, part_list):

        newset = set(part_list)
        oldset = self._partset
        self._partset = newset

        self._logger.info('Partition List : new %s old %s' % \
            (str(newset),str(oldset)))
        
        for addpart in (newset-oldset):
            self._logger.info('Partition Add : %s' % addpart)
            self.partition_change(addpart, True)
        
        for delpart in (oldset-newset):
            self._logger.info('Partition Del : %s' % delpart)
            self.partition_change(delpart, True)

    def start_libpart(self, ag_list):
        if not self._conf.zk_list():
            self._logger.error('Could not import libpartition: No zookeeper')
            return None
        if not ag_list:
            self._logger.error('Could not import libpartition: No alarmgen list')
            return None
        try:
            from libpartition.libpartition import PartitionClient
            self._logger.error('Starting PC')
            pc = PartitionClient("alarmgen",
                    self._libpart_name, ag_list,
                    self._conf.partitions(), self.libpart_cb,
                    ','.join(self._conf.zk_list()))
            self._logger.error('Started PC')
            return pc
        except Exception as e:
            self._logger.error('Could not import libpartition: %s' % str(e))
            return None

    def handle_uve_notif(self, uves, remove = False):
        self._logger.debug("Changed UVEs : %s" % str(uves))
        no_handlers = set()
        for uv in uves:
            tab = uv.split(':',1)[0]
            uve_name = uv.split(':',1)[1]
            if not self.mgrs.has_key(tab):
                no_handlers.add(tab)
                continue
            if remove:
                uve_data = []
            else:
                filters = {'kfilt': [uve_name]}
                itr = self._us.multi_uve_get(tab, True, filters)
                uve_data = itr.next()['value']
            if len(uve_data) == 0:
                self._logger.info("UVE %s deleted" % uv)
                if self.tab_alarms[tab].has_key(uv):
		    del self.tab_alarms[tab][uv]
                    uname = uv.split(":",1)[1]
                    ustruct = UVEAlarms(name = uname, deleted = True)
                    alarm_msg = AlarmTrace(data=ustruct, table=tab)
                    self._logger.info('send del alarm: %s' % (alarm_msg.log()))
                    alarm_msg.send()
                continue
            results = self.mgrs[tab].map_method("__call__", uv, uve_data)
            new_uve_alarms = {}
            for res in results:
                nm, errs = res
                self._logger.debug("Alarm[%s] %s: %s" % (tab, nm, str(errs)))
                elems = []
                for ae in errs:
                    rule, val = ae
                    rv = AlarmElement(rule, val)
                    elems.append(rv)
                if len(elems):
                    new_uve_alarms[nm] = UVEAlarmInfo(type = nm,
                                           description = elems, ack = False)
            if (not self.tab_alarms[tab].has_key(uv)) or \
                       pprint.pformat(self.tab_alarms[tab][uv]) != \
                       pprint.pformat(new_uve_alarms):
                uname = uv.split(":")[1]
                ustruct = UVEAlarms(name = uname, alarms = new_uve_alarms.values(),
                                    deleted = False)
                alarm_msg = AlarmTrace(data=ustruct, table=tab)
                self._logger.info('send alarm: %s' % (alarm_msg.log()))
                alarm_msg.send()
            self.tab_alarms[tab][uv] = new_uve_alarms
            
        if len(no_handlers):
            self._logger.debug('No Alarm Handlers for %s' % str(no_handlers))

    def handle_UVETableAlarmReq(self, req):
        status = False
        if req.table == "all":
            parts = self.tab_alarms.keys()
        else:
            parts = [req.table]
        self._logger.info("Got UVETableAlarmReq : %s" % str(parts))
        np = 1
        for pt in parts:
            resp = UVETableAlarmResp(table = pt)
            uves = []
            for uk,uv in self.tab_alarms[pt].iteritems():
                alms = []
                for ak,av in uv.iteritems():
                    alms.append(av)
                uves.append(UVEAlarms(name = uk, alarms = alms))
            resp.uves = uves 
            if np == len(parts):
                mr = False
            else:
                mr = True
            resp.response(req.context(), mr)
            np = np + 1

    
    def partition_change(self, partno, enl):
        """
        Call this function when getting or giving up
        ownership of a partition
        Args:
            partno : Partition Number
            enl    : True for acquiring, False for giving up
        Returns: 
            status of operation (True for success)
        """
        status = False
        if enl:
            if self._workers.has_key(partno):
                self._logger.info("Dup partition %d" % partno)
            else:
                #uvedb = self._us.get_part(partno)
                ph = UveStreamProc(','.join(self._conf.kafka_broker_list()),
                                   partno, "uve-" + str(partno),
                                   self._logger, self._us.get_part,
                                   self.handle_uve_notif)
                ph.start()
                self._workers[partno] = ph
                status = True
        else:
            if self._workers.has_key(partno):
                ph = self._workers[partno]
                gevent.kill(ph)
                res,db = ph.get()
                print "Returned " + str(res)
                print "State :"
                for k,v in db.iteritems():
                    print "%s -> %s" % (k,str(v)) 
                del self._workers[partno]
                status = True
            else:
                self._logger.info("No partition %d" % partno)

        return status
    
    def handle_PartitionOwnershipReq(self, req):
        self._logger.info("Got PartitionOwnershipReq: %s" % str(req))
        status = self.partition_change(req.partition, req.ownership)

        resp = PartitionOwnershipResp()
        resp.status = status
	resp.response(req.context())
               
    def process_stats(self):
        ''' Go through the UVEKey-Count stats collected over 
            the previous time period over all partitions
            and send it out
        '''
        for pk,pc in self._workers.iteritems():
            din, dout = pc.stats()
            for ktab,tab in dout.iteritems():
                au = AlarmgenUpdate()
                au.name = self._hostname
                au.instance =  self._instance_id
                au.table = ktab
                au.partition = pk
                au.keys = []
                for uk,uc in tab.iteritems():
                    ukc = UVEKeyInfo()
                    ukc.key = uk
                    ukc.count = uc
                    au.keys.append(ukc)
                au_trace = AlarmgenUpdateTrace(data=au)
                self._logger.debug('send key stats: %s' % (au_trace.log()))
                au_trace.send()

            for ktab,tab in din.iteritems():
                au = AlarmgenUpdate()
                au.name = self._hostname
                au.instance =  self._instance_id
                au.table = ktab
                au.partition = pk
                au.notifs = []
                for kcoll,coll in tab.iteritems():
                    for kgen,gen in coll.iteritems():
                        for tk,tc in gen.iteritems():
                            tkc = UVETypeInfo()
                            tkc.type= tk
                            tkc.count = tc
                            tkc.generator = kgen
                            tkc.collector = kcoll
                            au.notifs.append(tkc)
                au_trace = AlarmgenUpdateTrace(data=au)
                self._logger.debug('send notif stats: %s' % (au_trace.log()))
                au_trace.send()
         
    def handle_PartitionStatusReq(self, req):
        ''' Return the entire contents of the UVE DB for the 
            requested partitions
        '''
        if req.partition == -1:
            parts = self._workers.keys()
        else:
            parts = [req.partition]
        
        self._logger.info("Got PartitionStatusReq: %s" % str(parts))
        np = 1
        for pt in parts:
            resp = PartitionStatusResp()
            resp.partition = pt
            if self._workers.has_key(pt):
                resp.enabled = True
                resp.uves = []
                for kcoll,coll in self._workers[pt].contents().iteritems():
                    uci = UVECollInfo()
                    uci.collector = kcoll
                    uci.uves = []
                    for kgen,gen in coll.iteritems():
                        ugi = UVEGenInfo()
                        ugi.generator = kgen
                        ugi.uves = []
                        for uk,uc in gen.iteritems():
                            ukc = UVEKeyInfo()
                            ukc.key = uk
                            ukc.count = uc
                            ugi.uves.append(ukc)
                        uci.uves.append(ugi)
                    resp.uves.append(uci)
            else:
                resp.enabled = False
            if np == len(parts):
                mr = False
            else:
                mr = True
            resp.response(req.context(), mr)
            np = np + 1

    def disc_cb_coll(self, clist):
        '''
        Analytics node may be brought up/down any time. For UVE aggregation,
        alarmgen needs to know the list of all Analytics nodes (redis-uves).
        Periodically poll the Collector list [in lieu of 
        redi-uve nodes] from the discovery. 
        '''
        newlist = []
        for elem in clist:
            (ipaddr,port) = elem
            newlist.append((ipaddr, self._conf.redis_server_port()))
        self._us.update_redis_uve_list(newlist)

    def disc_cb_ag(self, alist):
        '''
        Analytics node may be brought up/down any time. For partitioning,
        alarmgen needs to know the list of all Analytics nodes (alarmgens).
        Periodically poll the alarmgen list from the discovery service
        '''
        newlist = []
        for elem in alist:
            (ipaddr, inst) = elem
            newlist.append(ipaddr + ":" + inst)

        # We should always include ourselves in the list of memebers
        newset = set(newlist)
        newset.add(self._libpart_name)
        newlist = list(newset)
        if not self._libpart:
            self._libpart = self.start_libpart(newlist)
        else:
            self._libpart.update_cluster_list(newlist)

    def run(self):
        alarmgen_cpu_info = CpuInfoData()
        while True:
            before = time.time()
            mod_cpu_info = ModuleCpuInfo()
            mod_cpu_info.module_id = self._moduleid
            mod_cpu_info.instance_id = self._instance_id
            mod_cpu_info.cpu_info = alarmgen_cpu_info.get_cpu_info(
                system=False)
            mod_cpu_state = ModuleCpuState()
            mod_cpu_state.name = self._hostname

            mod_cpu_state.module_cpu_info = [mod_cpu_info]

            alarmgen_cpu_state_trace = ModuleCpuStateTrace(data=mod_cpu_state)
            alarmgen_cpu_state_trace.send()

            aly_cpu_state = AnalyticsCpuState()
            aly_cpu_state.name = self._hostname

            aly_cpu_info = ProcessCpuInfo()
            aly_cpu_info.module_id= self._moduleid
            aly_cpu_info.inst_id = self._instance_id
            aly_cpu_info.cpu_share = mod_cpu_info.cpu_info.cpu_share
            aly_cpu_info.mem_virt = mod_cpu_info.cpu_info.meminfo.virt
            aly_cpu_info.mem_res = mod_cpu_info.cpu_info.meminfo.res
            aly_cpu_state.cpu_info = [aly_cpu_info]

            aly_cpu_state_trace = AnalyticsCpuStateTrace(data=aly_cpu_state)
            aly_cpu_state_trace.send()

            # Send out the UVEKey-Count stats for this time period
            self.process_stats()

            duration = time.time() - before
            if duration < 60:
                gevent.sleep(60 - duration)
            else:
                self._logger.error("Periodic collection took %s sec" % duration)
コード例 #2
0
class Controller(object):
    
    @staticmethod
    def fail_cb(manager, entrypoint, exception):
        sandesh_global._logger.info("Load failed for %s with exception %s" % \
                                     (str(entrypoint),str(exception)))
        
    def __init__(self, conf):
        self._conf = conf
        module = Module.ALARM_GENERATOR
        self._moduleid = ModuleNames[module]
        node_type = Module2NodeType[module]
        self._node_type_name = NodeTypeNames[node_type]
        self._hostname = socket.gethostname()
        self._instance_id = self._conf.worker_id()
        sandesh_global.init_generator(self._moduleid, self._hostname,
                                      self._node_type_name, self._instance_id,
                                      self._conf.collectors(), 
                                      self._node_type_name,
                                      self._conf.http_port(),
                                      ['opserver.sandesh', 'sandesh'],
                                      host_ip=self._conf.host_ip())
        sandesh_global.set_logging_params(
            enable_local_log=self._conf.log_local(),
            category=self._conf.log_category(),
            level=self._conf.log_level(),
            file=self._conf.log_file(),
            enable_syslog=self._conf.use_syslog(),
            syslog_facility=self._conf.syslog_facility())
        self._logger = sandesh_global._logger

        # Trace buffer list
        self.trace_buf = [
            {'name':'DiscoveryMsg', 'size':1000}
        ]
        # Create trace buffers 
        for buf in self.trace_buf:
            sandesh_global.trace_buffer_create(name=buf['name'], size=buf['size'])

        tables = [ "ObjectCollectorInfo",
                   "ObjectDatabaseInfo",
                   "ObjectVRouter",
                   "ObjectBgpRouter",
                   "ObjectConfigNode" ] 
        self.mgrs = {}
        self.tab_alarms = {}
        for table in tables:
            self.mgrs[table] = hook.HookManager(
                namespace='contrail.analytics.alarms',
                name=table,
                invoke_on_load=True,
                invoke_args=(),
                on_load_failure_callback=Controller.fail_cb
            )
            
            for extn in self.mgrs[table][table]:
                self._logger.info('Loaded extensions for %s: %s,%s doc %s' % \
                    (table, extn.name, extn.entry_point_target, extn.obj.__doc__))

            self.tab_alarms[table] = {}

        ConnectionState.init(sandesh_global, self._hostname, self._moduleid,
            self._instance_id,
            staticmethod(ConnectionState.get_process_state_cb),
            NodeStatusUVE, NodeStatus)

        self._us = UVEServer(None, self._logger, self._conf.redis_password())

        self._workers = {}

        self.disc = None
        self._libpart_name = self._hostname + ":" + self._instance_id
        self._libpart = None
        self._partset = set()
        if self._conf.discovery()['server']:
            import discoveryclient.client as client 
            data = {
                'ip-address': self._hostname ,
                'port': self._instance_id
            }
            self.disc = client.DiscoveryClient(
                self._conf.discovery()['server'],
                self._conf.discovery()['port'],
                ModuleNames[Module.ALARM_GENERATOR])
            self._logger.info("Disc Publish to %s : %s"
                          % (str(self._conf.discovery()), str(data)))
            self.disc.publish(ALARM_GENERATOR_SERVICE_NAME, data)
        else:
            # If there is no discovery service, use fixed redis_uve list
            redis_uve_list = []
            try:
                for redis_uve in self._conf.redis_uve_list():
                    redis_ip_port = redis_uve.split(':')
                    redis_ip_port = (redis_ip_port[0], int(redis_ip_port[1]))
                    redis_uve_list.append(redis_ip_port)
            except Exception as e:
                self._logger.error('Failed to parse redis_uve_list: %s' % e)
            else:
                self._us.update_redis_uve_list(redis_uve_list)

            # If there is no discovery service, use fixed alarmgen list
            self._libpart = self.start_libpart(self._conf.alarmgen_list())

        PartitionOwnershipReq.handle_request = self.handle_PartitionOwnershipReq
        PartitionStatusReq.handle_request = self.handle_PartitionStatusReq
        UVETableAlarmReq.handle_request = self.handle_UVETableAlarmReq 

    def libpart_cb(self, part_list):

        agpi = AlarmgenPartionInfo()
        agpi.instance = self._instance_id
        agpi.partitions = part_list

        agp = AlarmgenPartition()
        agp.name = self._hostname
        agp.inst_parts = [agpi]
       
        agp_trace = AlarmgenPartitionTrace(data=agp)
        agp_trace.send() 

        newset = set(part_list)
        oldset = self._partset
        self._partset = newset

        self._logger.error('Partition List : new %s old %s' % \
            (str(newset),str(oldset)))
        
        for addpart in (newset-oldset):
            self._logger.error('Partition Add : %s' % addpart)
            self.partition_change(addpart, True)
        
        for delpart in (oldset-newset):
            self._logger.error('Partition Del : %s' % delpart)
            self.partition_change(delpart, False)

    def start_libpart(self, ag_list):
        if not self._conf.zk_list():
            self._logger.error('Could not import libpartition: No zookeeper')
            return None
        if not ag_list:
            self._logger.error('Could not import libpartition: No alarmgen list')
            return None
        try:
            from libpartition.libpartition import PartitionClient
            self._logger.error('Starting PC')
            agpi = AlarmgenPartionInfo()
            agpi.instance = self._instance_id
            agpi.partitions = []

            agp = AlarmgenPartition()
            agp.name = self._hostname
            agp.inst_parts = [agpi]
           
            agp_trace = AlarmgenPartitionTrace(data=agp)
            agp_trace.send() 

            pc = PartitionClient("alarmgen",
                    self._libpart_name, ag_list,
                    self._conf.partitions(), self.libpart_cb,
                    ','.join(self._conf.zk_list()))
            self._logger.error('Started PC')
            return pc
        except Exception as e:
            self._logger.error('Could not import libpartition: %s' % str(e))
            return None

    def handle_uve_notif(self, uves, remove = False):
        self._logger.debug("Changed UVEs : %s" % str(uves))
        no_handlers = set()
        for uv in uves:
            tab = uv.split(':',1)[0]
            uve_name = uv.split(':',1)[1]
            if not self.mgrs.has_key(tab):
                no_handlers.add(tab)
                continue
            if remove:
                uve_data = []
            else:
                filters = {'kfilt': [uve_name]}
                itr = self._us.multi_uve_get(tab, True, filters)
                uve_data = itr.next()['value']
            if len(uve_data) == 0:
                self._logger.info("UVE %s deleted" % uv)
                if self.tab_alarms[tab].has_key(uv):
                    del self.tab_alarms[tab][uv]
                    ustruct = UVEAlarms(name = uve_name, deleted = True)
                    alarm_msg = AlarmTrace(data=ustruct, table=tab)
                    self._logger.info('send del alarm: %s' % (alarm_msg.log()))
                    alarm_msg.send()
                continue
            results = self.mgrs[tab].map_method("__call__", uv, uve_data)
            new_uve_alarms = {}
            for res in results:
                nm, sev, errs = res
                self._logger.debug("Alarm[%s] %s: %s" % (tab, nm, str(errs)))
                elems = []
                for ae in errs:
                    rule, val = ae
                    rv = AlarmElement(rule, val)
                    elems.append(rv)
                if len(elems):
                    new_uve_alarms[nm] = UVEAlarmInfo(type = nm, severity = sev,
                                           timestamp = 0,
                                           description = elems, ack = False)
            del_types = []
            if self.tab_alarms[tab].has_key(uv):
                for nm, uai in self.tab_alarms[tab][uv].iteritems():
                    uai2 = copy.deepcopy(uai)
                    uai2.timestamp = 0
                    # This type was present earlier, but is now gone
                    if not new_uve_alarms.has_key(nm):
                        del_types.append(nm)
                    else:
                        # This type has no new information
                        if pprint.pformat(uai2) == \
                                pprint.pformat(new_uve_alarms[nm]):
                            del new_uve_alarms[nm]
            if len(del_types) != 0  or \
                    len(new_uve_alarms) != 0:
                self._logger.debug("Alarm[%s] Deleted %s" % \
                        (tab, str(del_types))) 
                self._logger.debug("Alarm[%s] Updated %s" % \
                        (tab, str(new_uve_alarms))) 
                # These alarm types are new or updated
                for nm, uai2 in new_uve_alarms.iteritems():
                    uai = copy.deepcopy(uai2)
                    uai.timestamp = UTCTimestampUsec()
                    if not self.tab_alarms[tab].has_key(uv):
                        self.tab_alarms[tab][uv] = {}
                    self.tab_alarms[tab][uv][nm] = uai
                # These alarm types are now gone
                for dnm in del_types:
                    del self.tab_alarms[tab][uv][dnm]
                    
                ustruct = None
                if len(self.tab_alarms[tab][uv]) == 0:
                    ustruct = UVEAlarms(name = uve_name,
                            deleted = True)
                    del self.tab_alarms[tab][uv]
                else:
                    ustruct = UVEAlarms(name = uve_name,
                            alarms = self.tab_alarms[tab][uv].values(),
                            deleted = False)
                alarm_msg = AlarmTrace(data=ustruct, table=tab)
                self._logger.info('send alarm: %s' % (alarm_msg.log()))
                alarm_msg.send()
            
        if len(no_handlers):
            self._logger.debug('No Alarm Handlers for %s' % str(no_handlers))

    def handle_UVETableAlarmReq(self, req):
        status = False
        if req.table == "all":
            parts = self.tab_alarms.keys()
        else:
            parts = [req.table]
        self._logger.info("Got UVETableAlarmReq : %s" % str(parts))
        np = 1
        for pt in parts:
            resp = UVETableAlarmResp(table = pt)
            uves = []
            for uk,uv in self.tab_alarms[pt].iteritems():
                alms = []
                for ak,av in uv.iteritems():
                    alms.append(av)
                uves.append(UVEAlarms(name = uk, alarms = alms))
            resp.uves = uves 
            if np == len(parts):
                mr = False
            else:
                mr = True
            resp.response(req.context(), mr)
            np = np + 1

    
    def partition_change(self, partno, enl):
        """
        Call this function when getting or giving up
        ownership of a partition
        Args:
            partno : Partition Number
            enl    : True for acquiring, False for giving up
        Returns: 
            status of operation (True for success)
        """
        status = False
        if enl:
            if self._workers.has_key(partno):
                self._logger.info("Dup partition %d" % partno)
            else:
                #uvedb = self._us.get_part(partno)
                ph = UveStreamProc(','.join(self._conf.kafka_broker_list()),
                                   partno, "uve-" + str(partno),
                                   self._logger, self._us.get_part,
                                   self.handle_uve_notif)
                ph.start()
                self._workers[partno] = ph
                status = True
        else:
            if self._workers.has_key(partno):
                ph = self._workers[partno]
                gevent.kill(ph)
                res,db = ph.get()
                print "Returned " + str(res)
                print "State :"
                for k,v in db.iteritems():
                    print "%s -> %s" % (k,str(v)) 
                del self._workers[partno]
                status = True
            else:
                self._logger.info("No partition %d" % partno)

        return status
    
    def handle_PartitionOwnershipReq(self, req):
        self._logger.info("Got PartitionOwnershipReq: %s" % str(req))
        status = self.partition_change(req.partition, req.ownership)

        resp = PartitionOwnershipResp()
        resp.status = status
	resp.response(req.context())
               
    def process_stats(self):
        ''' Go through the UVEKey-Count stats collected over 
            the previous time period over all partitions
            and send it out
        '''
        s_partitions = set()
        s_keys = set()
        n_updates = 0
        for pk,pc in self._workers.iteritems():
            s_partitions.add(pk)
            din, dout = pc.stats()
            for ktab,tab in dout.iteritems():
                au_keys = []
                for uk,uc in tab.iteritems():
                    s_keys.add(uk)
                    n_updates += uc
                    ukc = UVEKeyInfo()
                    ukc.key = uk
                    ukc.count = uc
                    au_keys.append(ukc)
                au_obj = AlarmgenUpdate(name=sandesh_global._source + ':' + \
                        sandesh_global._node_type + ':' + \
                        sandesh_global._module + ':' + \
                        sandesh_global._instance_id,
                        partition = pk,
                        table = ktab,
                        keys = au_keys,
                        notifs = None)
                self._logger.debug('send key stats: %s' % (au_obj.log()))
                au_obj.send()

            for ktab,tab in din.iteritems():
                au_notifs = []
                for kcoll,coll in tab.iteritems():
                    for kgen,gen in coll.iteritems():
                        for tk,tc in gen.iteritems():
                            tkc = UVETypeInfo()
                            tkc.type= tk
                            tkc.count = tc
                            tkc.generator = kgen
                            tkc.collector = kcoll
                            au_notifs.append(tkc)
                au_obj = AlarmgenUpdate(name=sandesh_global._source + ':' + \
                        sandesh_global._node_type + ':' + \
                        sandesh_global._module + ':' + \
                        sandesh_global._instance_id,
                        partition = pk,
                        table = ktab,
                        keys = None,
                        notifs = au_notifs)
                self._logger.debug('send notif stats: %s' % (au_obj.log()))
                au_obj.send()

        au = AlarmgenStatus()
        au.name = self._hostname
        au.counters = []
        au.alarmgens = []
        ags = AlarmgenStats()
        ags.instance =  self._instance_id
        ags.partitions = len(s_partitions)
        ags.keys = len(s_keys)
        ags.updates = n_updates
        au.counters.append(ags)

        agname = sandesh_global._source + ':' + \
                        sandesh_global._node_type + ':' + \
                        sandesh_global._module + ':' + \
                        sandesh_global._instance_id
        au.alarmgens.append(agname)
 
        atrace = AlarmgenStatusTrace(data = au)
        self._logger.debug('send alarmgen status : %s' % (atrace.log()))
        atrace.send()
         
    def handle_PartitionStatusReq(self, req):
        ''' Return the entire contents of the UVE DB for the 
            requested partitions
        '''
        if req.partition == -1:
            parts = self._workers.keys()
        else:
            parts = [req.partition]
        
        self._logger.info("Got PartitionStatusReq: %s" % str(parts))
        np = 1
        for pt in parts:
            resp = PartitionStatusResp()
            resp.partition = pt
            if self._workers.has_key(pt):
                resp.enabled = True
                resp.uves = []
                for kcoll,coll in self._workers[pt].contents().iteritems():
                    uci = UVECollInfo()
                    uci.collector = kcoll
                    uci.uves = []
                    for kgen,gen in coll.iteritems():
                        ugi = UVEGenInfo()
                        ugi.generator = kgen
                        ugi.uves = []
                        for uk,uc in gen.iteritems():
                            ukc = UVEKeyInfo()
                            ukc.key = uk
                            ukc.count = uc
                            ugi.uves.append(ukc)
                        uci.uves.append(ugi)
                    resp.uves.append(uci)
            else:
                resp.enabled = False
            if np == len(parts):
                mr = False
            else:
                mr = True
            resp.response(req.context(), mr)
            np = np + 1

    def disc_cb_coll(self, clist):
        '''
        Analytics node may be brought up/down any time. For UVE aggregation,
        alarmgen needs to know the list of all Analytics nodes (redis-uves).
        Periodically poll the Collector list [in lieu of 
        redi-uve nodes] from the discovery. 
        '''
        newlist = []
        for elem in clist:
            (ipaddr,port) = elem
            newlist.append((ipaddr, self._conf.redis_server_port()))
        self._us.update_redis_uve_list(newlist)

    def disc_cb_ag(self, alist):
        '''
        Analytics node may be brought up/down any time. For partitioning,
        alarmgen needs to know the list of all Analytics nodes (alarmgens).
        Periodically poll the alarmgen list from the discovery service
        '''
        newlist = []
        for elem in alist:
            (ipaddr, inst) = elem
            newlist.append(ipaddr + ":" + inst)

        # We should always include ourselves in the list of memebers
        newset = set(newlist)
        newset.add(self._libpart_name)
        newlist = list(newset)
        if not self._libpart:
            self._libpart = self.start_libpart(newlist)
        else:
            self._libpart.update_cluster_list(newlist)

    def run(self):
        alarmgen_cpu_info = CpuInfoData()
        while True:
            before = time.time()
            mod_cpu_info = ModuleCpuInfo()
            mod_cpu_info.module_id = self._moduleid
            mod_cpu_info.instance_id = self._instance_id
            mod_cpu_info.cpu_info = alarmgen_cpu_info.get_cpu_info(
                system=False)
            mod_cpu_state = ModuleCpuState()
            mod_cpu_state.name = self._hostname

            mod_cpu_state.module_cpu_info = [mod_cpu_info]

            alarmgen_cpu_state_trace = ModuleCpuStateTrace(data=mod_cpu_state)
            alarmgen_cpu_state_trace.send()

            aly_cpu_state = AnalyticsCpuState()
            aly_cpu_state.name = self._hostname

            aly_cpu_info = ProcessCpuInfo()
            aly_cpu_info.module_id= self._moduleid
            aly_cpu_info.inst_id = self._instance_id
            aly_cpu_info.cpu_share = mod_cpu_info.cpu_info.cpu_share
            aly_cpu_info.mem_virt = mod_cpu_info.cpu_info.meminfo.virt
            aly_cpu_info.mem_res = mod_cpu_info.cpu_info.meminfo.res
            aly_cpu_state.cpu_info = [aly_cpu_info]

            aly_cpu_state_trace = AnalyticsCpuStateTrace(data=aly_cpu_state)
            aly_cpu_state_trace.send()

            # Send out the UVEKey-Count stats for this time period
            self.process_stats()

            duration = time.time() - before
            if duration < 60:
                gevent.sleep(60 - duration)
            else:
                self._logger.error("Periodic collection took %s sec" % duration)
コード例 #3
0
class Controller(object):
    
    @staticmethod
    def fail_cb(manager, entrypoint, exception):
        sandesh_global._logger.info("Load failed for %s with exception %s" % \
                                     (str(entrypoint),str(exception)))
        
    def __init__(self, conf):
        self._conf = conf
        module = Module.ALARM_GENERATOR
        self._moduleid = ModuleNames[module]
        node_type = Module2NodeType[module]
        self._node_type_name = NodeTypeNames[node_type]
        self._hostname = socket.gethostname()
        self._instance_id = self._conf.worker_id()
        sandesh_global.init_generator(self._moduleid, self._hostname,
                                      self._node_type_name, self._instance_id,
                                      self._conf.collectors(), 
                                      self._node_type_name,
                                      self._conf.http_port(),
                                      ['opserver.sandesh', 'sandesh'])
        sandesh_global.set_logging_params(
            enable_local_log=self._conf.log_local(),
            category=self._conf.log_category(),
            level=self._conf.log_level(),
            file=self._conf.log_file(),
            enable_syslog=self._conf.use_syslog(),
            syslog_facility=self._conf.syslog_facility())
        self._logger = sandesh_global._logger

        # Trace buffer list
        self.trace_buf = [
            {'name':'DiscoveryMsg', 'size':1000}
        ]
        # Create trace buffers 
        for buf in self.trace_buf:
            sandesh_global.trace_buffer_create(name=buf['name'], size=buf['size'])

        tables = [ "ObjectCollectorInfo",
                   "ObjectDatabaseInfo",
                   "ObjectVRouter",
                   "ObjectBgpRouter",
                   "ObjectConfigNode" ] 
        self.mgrs = {}
        self.tab_alarms = {}
        for table in tables:
            self.mgrs[table] = hook.HookManager(
                namespace='contrail.analytics.alarms',
                name=table,
                invoke_on_load=True,
                invoke_args=(),
                on_load_failure_callback=Controller.fail_cb
            )
            
            for extn in self.mgrs[table][table]:
                self._logger.info('Loaded extensions for %s: %s,%s' % \
                    (table, extn.name, extn.entry_point_target))

            self.tab_alarms[table] = {}

        ConnectionState.init(sandesh_global, self._hostname, self._moduleid,
            self._instance_id,
            staticmethod(ConnectionState.get_process_state_cb),
            NodeStatusUVE, NodeStatus)

        self._us = UVEServer(None, self._logger, self._conf.redis_password())

        self._workers = {}

        self.disc = None
        self._libpart_name = self._hostname + ":" + self._instance_id
        self._libpart = None
        self._partset = set()
        if self._conf.discovery()['server']:
            import discoveryclient.client as client 
            data = {
                'ip-address': self._hostname ,
                'port': self._instance_id
            }
            self.disc = client.DiscoveryClient(
                self._conf.discovery()['server'],
                self._conf.discovery()['port'],
                ModuleNames[Module.ALARM_GENERATOR])
            self._logger.info("Disc Publish to %s : %s"
                          % (str(self._conf.discovery()), str(data)))
            self.disc.publish(ALARM_GENERATOR_SERVICE_NAME, data)
        else:
            # If there is no discovery service, use fixed redis_uve list
            redis_uve_list = []
            try:
                for redis_uve in self._conf.redis_uve_list():
                    redis_ip_port = redis_uve.split(':')
                    redis_ip_port = (redis_ip_port[0], int(redis_ip_port[1]))
                    redis_uve_list.append(redis_ip_port)
            except Exception as e:
                self._logger.error('Failed to parse redis_uve_list: %s' % e)
            else:
                self._us.update_redis_uve_list(redis_uve_list)

            # If there is no discovery service, use fixed alarmgen list
            self._libpart = self.start_libpart(self._conf.alarmgen_list())

        PartitionOwnershipReq.handle_request = self.handle_PartitionOwnershipReq
        PartitionStatusReq.handle_request = self.handle_PartitionStatusReq
        UVETableAlarmReq.handle_request = self.handle_UVETableAlarmReq 


    def libpart_cb(self, part_list):

        newset = set(part_list)
        oldset = self._partset
        self._partset = newset

        self._logger.info('Partition List : new %s old %s' % \
            (str(newset),str(oldset)))
        
        for addpart in (newset-oldset):
            self._logger.info('Partition Add : %s' % addpart)
            self.partition_change(addpart, True)
        
        for delpart in (oldset-newset):
            self._logger.info('Partition Del : %s' % delpart)
            self.partition_change(delpart, True)

    def start_libpart(self, ag_list):
        if not self._conf.zk_list():
            self._logger.error('Could not import libpartition: No zookeeper')
            return None
        if not ag_list:
            self._logger.error('Could not import libpartition: No alarmgen list')
            return None
        try:
            from libpartition.libpartition import PartitionClient
            self._logger.error('Starting PC')

            pc = PartitionClient("alarmgen",
                    self._libpart_name, ag_list,
                    self._conf.partitions(), self.libpart_cb,
                    ','.join(self._conf.zk_list()))
            self._logger.error('Started PC')
            return pc
        except Exception as e:
            self._logger.error('Could not import libpartition: %s' % str(e))
            return None

    def handle_uve_notif(self, uves):
        self._logger.debug("Changed UVEs : %s" % str(uves))
        no_handlers = set()
        for uv in uves:
            tab = uv.split(':',1)[0]
            if not self.mgrs.has_key(tab):
                no_handlers.add(tab)
                continue
            itr = self._us.multi_uve_get(uv, True, None, None, None, None)
            uve_data = itr.next()['value']
            if len(uve_data) == 0:
                self._logger.info("UVE %s deleted" % uv)
                if self.tab_alarms[tab].has_key(uv):
		    del self.tab_alarms[tab][uv]
                    ustruct = UVEAlarms(name = uv, deleted = True)
                    alarm_msg = AlarmTrace(data=ustruct, table=tab)
                    self._logger.info('send del alarm: %s' % (alarm_msg.log()))
                    alarm_msg.send()
                continue
            results = self.mgrs[tab].map_method("__call__", uv, uve_data)
            new_uve_alarms = {}
            for res in results:
                nm, errs = res
                self._logger.info("Alarm[%s] %s: %s" % (tab, nm, str(errs)))
                elems = []
                for ae in errs:
                    rule, val = ae
                    rv = AlarmElement(rule, val)
                    elems.append(rv)
                if len(elems):
                    new_uve_alarms[nm] = UVEAlarmInfo(type = nm,
                                           description = elems, ack = False)
            if (not self.tab_alarms[tab].has_key(uv)) or \
                       pprint.pformat(self.tab_alarms[tab][uv]) != \
                       pprint.pformat(new_uve_alarms):
                ustruct = UVEAlarms(name = uv, alarms = new_uve_alarms.values(),
                                    deleted = False)
                alarm_msg = AlarmTrace(data=ustruct, table=tab)
                self._logger.info('send alarm: %s' % (alarm_msg.log()))
                alarm_msg.send()
            self.tab_alarms[tab][uv] = new_uve_alarms
            
        if len(no_handlers):
            self._logger.info('No Alarm Handlers for %s' % str(no_handlers))

    def handle_UVETableAlarmReq(self, req):
        status = False
        if req.table == "all":
            parts = self.tab_alarms.keys()
        else:
            parts = [req.table]
        self._logger.info("Got UVETableAlarmReq : %s" % str(parts))
        np = 1
        for pt in parts:
            resp = UVETableAlarmResp(table = pt)
            uves = []
            for uk,uv in self.tab_alarms[pt].iteritems():
                alms = []
                for ak,av in uv.iteritems():
                    alms.append(av)
                uves.append(UVEAlarms(name = uk, alarms = alms))
            resp.uves = uves 
            if np == len(parts):
                mr = False
            else:
                mr = True
            resp.response(req.context(), mr)
            np = np + 1

    
    def partition_change(self, partno, enl):
        """
        Call this function when getting or giving up
        ownership of a partition
        Args:
            partno : Partition Number
            enl    : True for acquiring, False for giving up
        Returns: 
            status of operation (True for success)
        """
        status = False
        if enl:
            if self._workers.has_key(partno):
                self._logger.info("Dup partition %d" % partno)
            else:
                uvedb = self._us.get_part(partno)
                ph = UveStreamProc(','.join(self._conf.kafka_broker_list()),
                                   partno, "uve-" + str(partno),
                                   self._logger, uvedb,
                                   self.handle_uve_notif)
                ph.start()
                self._workers[partno] = ph
                status = True
        else:
            if self._workers.has_key(partno):
                ph = self._workers[partno]
                gevent.kill(ph)
                res,db = ph.get()
                print "Returned " + str(res)
                print "State :"
                for k,v in db.iteritems():
                    print "%s -> %s" % (k,str(v)) 
                del self._workers[partno]
                status = True
            else:
                self._logger.info("No partition %d" % partno)

        return status
    
    def handle_PartitionOwnershipReq(self, req):
        self._logger.info("Got PartitionOwnershipReq: %s" % str(req))
        status = self.partition_change(req.partition, req.ownership)

        resp = PartitionOwnershipResp()
        resp.status = status
	resp.response(req.context())
                
    def handle_PartitionStatusReq(self, req):
        
        if req.partition == -1:
            parts = self._workers.keys()
        else:
            parts = [req.partition]
        
        self._logger.info("Got PartitionStatusReq: %s" % str(parts))
        np = 1
        for pt in parts:
            resp = PartitionStatusResp()
            resp.partition = pt
            if self._workers.has_key(pt):
                resp.enabled = True
                resp.uves = []
                for kcoll,coll in self._workers[pt].contents().iteritems():
                    uci = UVECollInfo()
                    uci.collector = kcoll
                    uci.uves = []
                    for kgen,gen in coll.iteritems():
                        ugi = UVEGenInfo()
                        ugi.generator = kgen
                        ugi.uves = []
                        for uk,uc in gen.iteritems():
                            ukc = UVEKeyInfo()
                            ukc.key = uk
                            ukc.count = uc
                            ugi.uves.append(ukc)
                        uci.uves.append(ugi)
                    resp.uves.append(uci)
            else:
                resp.enabled = False
            if np == len(parts):
                mr = False
            else:
                mr = True
            resp.response(req.context(), mr)
            np = np + 1

    def disc_cb_coll(self, clist):
        '''
        Analytics node may be brought up/down any time. For UVE aggregation,
        alarmgen needs to know the list of all Analytics nodes (redis-uves).
        Periodically poll the Collector list [in lieu of 
        redi-uve nodes] from the discovery. 
        '''
        newlist = []
        for elem in clist:
            (ipaddr,port) = elem
            newlist.append((ipaddr, self._conf.redis_server_port()))
        self._us.update_redis_uve_list(newlist)

    def disc_cb_ag(self, alist):
        '''
        Analytics node may be brought up/down any time. For partitioning,
        alarmgen needs to know the list of all Analytics nodes (alarmgens).
        Periodically poll the alarmgen list from the discovery service
        '''
        newlist = []
        for elem in alist:
            (ipaddr, inst) = elem
            newlist.append(ipaddr + ":" + inst)

        # We should always include ourselves in the list of memebers
        newset = set(newlist)
        newset.add(self._libpart_name)
        newlist = list(newset)
        if not self._libpart:
            self._libpart = self.start_libpart(newlist)
        else:
            self._libpart.update_cluster_list(newlist)

    def run(self):
        while True:
            gevent.sleep(60)
コード例 #4
0
class Controller(object):
    @staticmethod
    def fail_cb(manager, entrypoint, exception):
        sandesh_global._logger.info("Load failed for %s with exception %s" % \
                                     (str(entrypoint),str(exception)))

    def __init__(self, conf):
        self._conf = conf
        module = Module.ALARM_GENERATOR
        self._moduleid = ModuleNames[module]
        node_type = Module2NodeType[module]
        self._node_type_name = NodeTypeNames[node_type]
        self._hostname = socket.gethostname()
        self._instance_id = '0'
        sandesh_global.init_generator(self._moduleid, self._hostname,
                                      self._node_type_name, self._instance_id,
                                      self._conf.collectors(),
                                      self._node_type_name,
                                      self._conf.http_port(),
                                      ['opserver.sandesh', 'sandesh'])
        sandesh_global.set_logging_params(
            enable_local_log=self._conf.log_local(),
            category=self._conf.log_category(),
            level=self._conf.log_level(),
            file=self._conf.log_file(),
            enable_syslog=self._conf.use_syslog(),
            syslog_facility=self._conf.syslog_facility())
        self._logger = sandesh_global._logger

        # Trace buffer list
        self.trace_buf = [{'name': 'DiscoveryMsg', 'size': 1000}]
        # Create trace buffers
        for buf in self.trace_buf:
            sandesh_global.trace_buffer_create(name=buf['name'],
                                               size=buf['size'])

        tables = [
            "ObjectCollectorInfo", "ObjectDatabaseInfo", "ObjectVRouter",
            "ObjectBgpRouter", "ObjectConfigNode"
        ]
        self.mgrs = {}
        self.tab_alarms = {}
        for table in tables:
            self.mgrs[table] = hook.HookManager(
                namespace='contrail.analytics.alarms',
                name=table,
                invoke_on_load=True,
                invoke_args=(),
                on_load_failure_callback=Controller.fail_cb)

            for extn in self.mgrs[table][table]:
                self._logger.info('Loaded extensions for %s: %s,%s' % \
                    (table, extn.name, extn.entry_point_target))

            self.tab_alarms[table] = {}

        ConnectionState.init(
            sandesh_global, self._hostname, self._moduleid, self._instance_id,
            staticmethod(ConnectionState.get_process_state_cb), NodeStatusUVE,
            NodeStatus)

        self._us = UVEServer(None, self._logger, self._conf.redis_password())

        self.disc = None
        if self._conf.discovery()['server']:
            import discoveryclient.client as client
            data = {'ip-address': self._hostname, 'port': self._instance_id}
            self.disc = client.DiscoveryClient(
                self._conf.discovery()['server'],
                self._conf.discovery()['port'],
                ModuleNames[Module.ALARM_GENERATOR])
            self._logger.info("Disc Publish to %s : %s" %
                              (str(self._conf.discovery()), str(data)))
            self.disc.publish(ALARM_GENERATOR_SERVICE_NAME, data)
        else:
            redis_uve_list = []
            try:
                for redis_uve in self._conf.redis_uve_list():
                    redis_ip_port = redis_uve.split(':')
                    redis_ip_port = (redis_ip_port[0], int(redis_ip_port[1]))
                    redis_uve_list.append(redis_ip_port)
            except Exception as e:
                self._logger.error('Failed to parse redis_uve_list: %s' % e)
            else:
                self._us.update_redis_uve_list(redis_uve_list)

        PartitionOwnershipReq.handle_request = self.handle_PartitionOwnershipReq
        PartitionStatusReq.handle_request = self.handle_PartitionStatusReq
        UVETableAlarmReq.handle_request = self.handle_UVETableAlarmReq

        self._workers = {}

    def handle_uve_notif(self, uves):
        self._logger.debug("Changed UVEs : %s" % str(uves))
        no_handlers = set()
        for uv in uves:
            tab = uv.split(':', 1)[0]
            if not self.mgrs.has_key(tab):
                no_handlers.add(tab)
                continue
            itr = self._us.multi_uve_get(uv, True, None, None, None, None)
            uve_data = itr.next()['value']
            if len(uve_data) == 0:
                del self.tab_alarms[tab][uv]
                self._logger.info("UVE %s deleted" % uv)
                continue
            results = self.mgrs[tab].map_method("__call__", uv, uve_data)
            new_uve_alarms = {}
            for res in results:
                nm, errs = res
                self._logger.info("Alarm[%s] %s: %s" % (tab, nm, str(errs)))
                elems = []
                for ae in errs:
                    rule, val = ae
                    rv = AlarmElement(rule, val)
                    elems.append(rv)
                if len(elems):
                    new_uve_alarms[nm] = UVEAlarmInfo(type=nm,
                                                      description=elems,
                                                      ack=False)
            self.tab_alarms[tab][uv] = new_uve_alarms

        if len(no_handlers):
            self._logger.info('No Alarm Handlers for %s' % str(no_handlers))

    def handle_UVETableAlarmReq(self, req):
        status = False
        if req.table == "all":
            parts = self.tab_alarms.keys()
        else:
            parts = [req.table]
        self._logger.info("Got UVETableAlarmReq : %s" % str(parts))
        np = 1
        for pt in parts:
            resp = UVETableAlarmResp(table=pt)
            uves = []
            for uk, uv in self.tab_alarms[pt].iteritems():
                alms = []
                for ak, av in uv.iteritems():
                    alms.append(av)
                uves.append(UVEAlarms(name=uk, alarms=alms))
            resp.uves = uves
            if np == len(parts):
                mr = False
            else:
                mr = True
            resp.response(req.context(), mr)
            np = np + 1

    def handle_PartitionOwnershipReq(self, req):
        self._logger.info("Got PartitionOwnershipReq: %s" % str(req))
        status = False
        if req.ownership:
            if self._workers.has_key(req.partition):
                self._logger.info("Dup partition %d" % req.partition)
            else:
                uvedb = self._us.get_part(req.partition)
                ph = UveStreamProc(','.join(self._conf.kafka_broker_list()),
                                   req.partition, "uve-" + str(req.partition),
                                   self._logger, uvedb, self.handle_uve_notif)
                ph.start()
                self._workers[req.partition] = ph
                status = True
        else:
            #import pdb; pdb.set_trace()
            if self._workers.has_key(req.partition):
                ph = self._workers[req.partition]
                gevent.kill(ph)
                res, db = ph.get()
                print "Returned " + str(res)
                print "State :"
                for k, v in db.iteritems():
                    print "%s -> %s" % (k, str(v))
                del self._workers[req.partition]
                status = True
            else:
                self._logger.info("No partition %d" % req.partition)

        resp = PartitionOwnershipResp()
        resp.status = status
        resp.response(req.context())

    def handle_PartitionStatusReq(self, req):

        if req.partition == -1:
            parts = self._workers.keys()
        else:
            parts = [req.partition]

        self._logger.info("Got PartitionStatusReq: %s" % str(parts))
        np = 1
        for pt in parts:
            resp = PartitionStatusResp()
            resp.partition = pt
            if self._workers.has_key(pt):
                resp.enabled = True
                resp.uves = []
                for kcoll, coll in self._workers[pt].contents().iteritems():
                    uci = UVECollInfo()
                    uci.collector = kcoll
                    uci.uves = []
                    for kgen, gen in coll.iteritems():
                        ugi = UVEGenInfo()
                        ugi.generator = kgen
                        ugi.uves = list(gen)
                        uci.uves.append(ugi)
                    resp.uves.append(uci)
            else:
                resp.enabled = False
            if np == len(parts):
                mr = False
            else:
                mr = True
            resp.response(req.context(), mr)
            np = np + 1

    def disc_cb_coll(self, clist):
        '''
        Analytics node may be brought up/down any time. For UVE aggregation,
        alarmgen needs to know the list of all Analytics nodes (redis-uves).
        Periodically poll the Collector list [in lieu of 
        redi-uve nodes] from the discovery. 
        '''
        newlist = []
        for elem in clist:
            (ipaddr, port) = elem
            newlist.append((ipaddr, self._conf.redis_server_port()))
        self._us.update_redis_uve_list(newlist)

    def disc_cb_ag(self, alist):
        '''
        Analytics node may be brought up/down any time. For partitioning,
        alarmgen needs to know the list of all Analytics nodes (alarmgens).
        Periodically poll the alarmgen list from the discovery service
        '''
        # TODO : Hookup with partitioning library
        pass

    def run(self):
        while True:
            gevent.sleep(60)
コード例 #5
0
class Controller(object):
    
    @staticmethod
    def fail_cb(manager, entrypoint, exception):
        sandesh_global._logger.info("Load failed for %s with exception %s" % \
                                     (str(entrypoint),str(exception)))
        
    def __init__(self, conf):
        self._conf = conf
        module = Module.ALARM_GENERATOR
        self._moduleid = ModuleNames[module]
        node_type = Module2NodeType[module]
        self._node_type_name = NodeTypeNames[node_type]
        self._hostname = socket.gethostname()
        self._instance_id = '0'
        sandesh_global.init_generator(self._moduleid, self._hostname,
                                      self._node_type_name, self._instance_id,
                                      self._conf.collectors(), 
                                      self._node_type_name,
                                      self._conf.http_port(),
                                      ['opserver.sandesh', 'sandesh'])
        sandesh_global.set_logging_params(
            enable_local_log=self._conf.log_local(),
            category=self._conf.log_category(),
            level=self._conf.log_level(),
            file=self._conf.log_file(),
            enable_syslog=self._conf.use_syslog(),
            syslog_facility=self._conf.syslog_facility())
        self._logger = sandesh_global._logger

        # Trace buffer list
        self.trace_buf = [
            {'name':'DiscoveryMsg', 'size':1000}
        ]
        # Create trace buffers 
        for buf in self.trace_buf:
            sandesh_global.trace_buffer_create(name=buf['name'], size=buf['size'])

        tables = [ "ObjectCollectorInfo",
                   "ObjectDatabaseInfo",
                   "ObjectVRouter",
                   "ObjectBgpRouter",
                   "ObjectConfigNode" ] 
        self.mgrs = {}
        self.tab_alarms = {}
        for table in tables:
            self.mgrs[table] = hook.HookManager(
                namespace='contrail.analytics.alarms',
                name=table,
                invoke_on_load=True,
                invoke_args=(),
                on_load_failure_callback=Controller.fail_cb
            )
            
            for extn in self.mgrs[table][table]:
                self._logger.info('Loaded extensions for %s: %s,%s' % \
                    (table, extn.name, extn.entry_point_target))

            self.tab_alarms[table] = {}

        ConnectionState.init(sandesh_global, self._hostname, self._moduleid,
            self._instance_id,
            staticmethod(ConnectionState.get_process_state_cb),
            NodeStatusUVE, NodeStatus)

        self._us = UVEServer(None, self._logger, self._conf.redis_password())

        self.disc = None
        if self._conf.discovery()['server']:
            import discoveryclient.client as client 
            data = {
                'ip-address': self._hostname ,
                'port': self._instance_id
            }
            self.disc = client.DiscoveryClient(
                self._conf.discovery()['server'],
                self._conf.discovery()['port'],
                ModuleNames[Module.ALARM_GENERATOR])
            self._logger.info("Disc Publish to %s : %s"
                          % (str(self._conf.discovery()), str(data)))
            self.disc.publish(ALARM_GENERATOR_SERVICE_NAME, data)
        else:
            redis_uve_list = []
            try:
                for redis_uve in self._conf.redis_uve_list():
                    redis_ip_port = redis_uve.split(':')
                    redis_ip_port = (redis_ip_port[0], int(redis_ip_port[1]))
                    redis_uve_list.append(redis_ip_port)
            except Exception as e:
                self._logger.error('Failed to parse redis_uve_list: %s' % e)
            else:
                self._us.update_redis_uve_list(redis_uve_list)

        PartitionOwnershipReq.handle_request = self.handle_PartitionOwnershipReq
        PartitionStatusReq.handle_request = self.handle_PartitionStatusReq
        UVETableAlarmReq.handle_request = self.handle_UVETableAlarmReq 

        self._workers = {}

    def handle_uve_notif(self, uves):
        self._logger.debug("Changed UVEs : %s" % str(uves))
        no_handlers = set()
        for uv in uves:
            tab = uv.split(':',1)[0]
            if not self.mgrs.has_key(tab):
                no_handlers.add(tab)
                continue
            itr = self._us.multi_uve_get(uv, True, None, None, None, None)
            uve_data = itr.next()['value']
            if len(uve_data) == 0:
                del self.tab_alarms[tab][uv]
                self._logger.info("UVE %s deleted" % uv)
                continue
            results = self.mgrs[tab].map_method("__call__", uv, uve_data)
            new_uve_alarms = {}
            for res in results:
                nm, errs = res
                self._logger.info("Alarm[%s] %s: %s" % (tab, nm, str(errs)))
                elems = []
                for ae in errs:
                    rule, val = ae
                    rv = AlarmElement(rule, val)
                    elems.append(rv)
                if len(elems):
                    new_uve_alarms[nm] = UVEAlarmInfo(type = nm,
                                           description = elems, ack = False)
            self.tab_alarms[tab][uv] = new_uve_alarms
            
        if len(no_handlers):
            self._logger.info('No Alarm Handlers for %s' % str(no_handlers))

    def handle_UVETableAlarmReq(self, req):
        status = False
        if req.table == "all":
            parts = self.tab_alarms.keys()
        else:
            parts = [req.table]
        self._logger.info("Got UVETableAlarmReq : %s" % str(parts))
        np = 1
        for pt in parts:
            resp = UVETableAlarmResp(table = pt)
            uves = []
            for uk,uv in self.tab_alarms[pt].iteritems():
                alms = []
                for ak,av in uv.iteritems():
                    alms.append(av)
                uves.append(UVEAlarms(name = uk, alarms = alms))
            resp.uves = uves 
            if np == len(parts):
                mr = False
            else:
                mr = True
            resp.response(req.context(), mr)
            np = np + 1
        
    def handle_PartitionOwnershipReq(self, req):
        self._logger.info("Got PartitionOwnershipReq: %s" % str(req))
        status = False
        if req.ownership:
            if self._workers.has_key(req.partition):
                self._logger.info("Dup partition %d" % req.partition)
            else:
                uvedb = self._us.get_part(req.partition)
                ph = UveStreamProc(','.join(self._conf.kafka_broker_list()),
                                   req.partition, "uve-" + str(req.partition),
                                   self._logger, uvedb,
                                   self.handle_uve_notif)
                ph.start()
                self._workers[req.partition] = ph
                status = True
        else:
            #import pdb; pdb.set_trace()
            if self._workers.has_key(req.partition):
                ph = self._workers[req.partition]
                gevent.kill(ph)
                res,db = ph.get()
                print "Returned " + str(res)
                print "State :"
                for k,v in db.iteritems():
                    print "%s -> %s" % (k,str(v)) 
                del self._workers[req.partition]
                status = True
            else:
                self._logger.info("No partition %d" % req.partition)

        resp = PartitionOwnershipResp()
        resp.status = status
	resp.response(req.context())
                
    def handle_PartitionStatusReq(self, req):
        
        if req.partition == -1:
            parts = self._workers.keys()
        else:
            parts = [req.partition]
        
        self._logger.info("Got PartitionStatusReq: %s" % str(parts))
        np = 1
        for pt in parts:
            resp = PartitionStatusResp()
            resp.partition = pt
            if self._workers.has_key(pt):
                resp.enabled = True
                resp.uves = []
                for kcoll,coll in self._workers[pt].contents().iteritems():
                    uci = UVECollInfo()
                    uci.collector = kcoll
                    uci.uves = []
                    for kgen,gen in coll.iteritems():
                        ugi = UVEGenInfo()
                        ugi.generator = kgen
                        ugi.uves = list(gen)
                        uci.uves.append(ugi)
                    resp.uves.append(uci)
            else:
                resp.enabled = False
            if np == len(parts):
                mr = False
            else:
                mr = True
            resp.response(req.context(), mr)
            np = np + 1

    def disc_cb_coll(self, clist):
        '''
        Analytics node may be brought up/down any time. For UVE aggregation,
        alarmgen needs to know the list of all Analytics nodes (redis-uves).
        Periodically poll the Collector list [in lieu of 
        redi-uve nodes] from the discovery. 
        '''
        newlist = []
        for elem in clist:
            (ipaddr,port) = elem
            newlist.append((ipaddr, self._conf.redis_server_port()))
        self._us.update_redis_uve_list(newlist)

    def disc_cb_ag(self, alist):
        '''
        Analytics node may be brought up/down any time. For partitioning,
        alarmgen needs to know the list of all Analytics nodes (alarmgens).
        Periodically poll the alarmgen list from the discovery service
        '''
        # TODO : Hookup with partitioning library
        pass

    def run(self):
        while True:
            gevent.sleep(60)