Example #1
0
class Monitor(object):
    def __init__(self, skip_rabbit):
        self.faucet_event = []
        self.m_queue = queue.Queue()
        self.skip_rabbit = skip_rabbit
        self.logger = logger
        self.rabbit_channel_connection_local = None
        self.rabbit_channel_connection_local_fa = None

        # get config options
        self.controller = Config().get_config()

        # timer class to call things periodically in own thread
        self.schedule = schedule

        # setup prometheus
        self.prom = Prometheus()
        try:
            self.prom.initialize_metrics()
        except Exception as e:  # pragma: no cover
            self.logger.debug(
                'Prometheus metrics are already initialized: {0}'.format(
                    str(e)))
        Prometheus.start()

        # initialize sdnconnect
        self.s = SDNConnect(self.controller)

        # schedule periodic scan of endpoints thread
        self.schedule.every(self.controller['scan_frequency']).seconds.do(
            partial(schedule_job_kickurl, schedule_func=self))

        # schedule periodic reinvestigations thread
        self.schedule.every(
            self.controller['reinvestigation_frequency']).seconds.do(
                partial(schedule_job_reinvestigation, schedule_func=self))

        # schedule all threads
        self.schedule_thread = threading.Thread(target=partial(
            schedule_thread_worker, schedule=self.schedule),
                                                name='st_worker')

    def format_rabbit_message(self, item):
        '''
        read a message off the rabbit_q
        the message should be item = (routing_key,msg)
        '''
        ret_val = {}

        routing_key, my_obj = item
        self.logger.debug('rabbit_message:{0}'.format(my_obj))
        my_obj = json.loads(my_obj)
        self.logger.debug('routing_key:{0}'.format(routing_key))
        remove_list = []

        if routing_key == 'poseidon.algos.decider':
            self.logger.debug('decider value:{0}'.format(my_obj))
            for name, message in my_obj.items():
                endpoint = self.s.endpoints.get(name, None)
                if endpoint and message.get('plugin', None) == 'ncapture':
                    endpoint.trigger('unknown')
                    endpoint.p_next_state = None
                    endpoint.p_prev_states.append(
                        (endpoint.state, int(time.time())))
                    if message.get('valid', False):
                        ret_val.update(my_obj)
                    else:
                        ret_val = {}
                        break
        elif routing_key == 'poseidon.action.ignore':
            for name in my_obj:
                endpoint = self.s.endpoints.get(name, None)
                if endpoint:
                    endpoint.ignore = True
        elif routing_key == 'poseidon.action.clear.ignored':
            for name in my_obj:
                endpoint = self.s.endpoints.get(name, None)
                if endpoint:
                    endpoint.ignore = False
        elif routing_key == 'poseidon.action.change':
            for name, state in my_obj:
                endpoint = self.s.endpoints.get(name, None)
                if endpoint:
                    try:
                        if state != 'mirror' and state != 'reinvestigate' and (
                                endpoint.state == 'mirroring'
                                or endpoint.state == 'reinvestigating'):
                            status = Actions(endpoint,
                                             self.s.sdnc).unmirror_endpoint()
                            if not status:
                                self.logger.warning(
                                    'Unable to unmirror the endpoint: {0}'.
                                    format(endpoint.name))
                        endpoint.trigger(state)
                        endpoint.p_next_state = None
                        endpoint.p_prev_states.append(
                            (endpoint.state, int(time.time())))
                        if endpoint.state == 'mirroring' or endpoint.state == 'reinvestigating':
                            status = Actions(endpoint,
                                             self.s.sdnc).mirror_endpoint()
                            if status:
                                try:
                                    self.s.r.hincrby('vent_plugin_counts',
                                                     'ncapture')
                                except Exception as e:  # pragma: no cover
                                    self.logger.error(
                                        'Failed to update count of plugins because: {0}'
                                        .format(str(e)))
                            else:
                                self.logger.warning(
                                    'Unable to mirror the endpoint: {0}'.
                                    format(endpoint.name))
                    except Exception as e:  # pragma: no cover
                        self.logger.error(
                            'Unable to change endpoint {0} because: {1}'.
                            format(endpoint.name, str(e)))
        elif routing_key == 'poseidon.action.update_acls':
            for ip in my_obj:
                rules = my_obj[ip]
                endpoints = self.s.endpoints_by_ip(ip)
                if endpoints:
                    endpoint = endpoints[0]
                    try:
                        status = Actions(endpoint, self.s.sdnc).update_acls(
                            rules_file=self.controller['RULES_FILE'],
                            endpoints=endpoints,
                            force_apply_rules=rules)
                        if not status:
                            self.logger.warning(
                                'Unable to apply rules: {0} to endpoint: {1}'.
                                format(rules, endpoint.name))
                    except Exception as e:
                        self.logger.error(
                            'Unable to apply rules: {0} to endpoint: {1} because {2}'
                            .format(rules, endpoint.name, str(e)))
        elif routing_key == 'poseidon.action.remove':
            remove_list = [name for name in my_obj]
        elif routing_key == 'poseidon.action.remove.ignored':
            remove_list = [
                endpoint.name for endpoint in self.s.endpoints.values()
                if endpoint.ignore
            ]
        elif routing_key == 'poseidon.action.remove.inactives':
            remove_list = [
                endpoint.name for endpoint in self.s.endpoints.values()
                if endpoint.state == 'inactive'
            ]
        elif routing_key == self.controller['FA_RABBIT_ROUTING_KEY']:
            self.logger.debug('FAUCET Event:{0}'.format(my_obj))
            ret_val.update(my_obj)
        for endpoint_name in remove_list:
            if endpoint_name in self.s.endpoints:
                del self.s.endpoints[endpoint_name]
        return ret_val

    def process(self):
        global CTRL_C
        signal.signal(signal.SIGINT, partial(self.signal_handler))
        while not CTRL_C['STOP']:
            time.sleep(1)

            found_work, item = self.get_q_item()
            ml_returns = {}

            if found_work and item[0] == self.controller[
                    'FA_RABBIT_ROUTING_KEY']:
                self.faucet_event.append(self.format_rabbit_message(item))
                self.logger.debug('Faucet event: {0}'.format(
                    self.faucet_event))
            elif found_work:
                msg = self.format_rabbit_message(item)
                if 'data' in msg:
                    ml_returns = msg['data']
                if ml_returns:
                    self.logger.info('ML results: {0}'.format(ml_returns))
                extras = deepcopy(ml_returns)
                # process results from ml output and update impacted endpoints
                for ep in self.s.endpoints.values():
                    if ep.name in ml_returns:
                        del extras[ep.name]
                    if ep.name in ml_returns and 'valid' in ml_returns[
                            ep.name] and not ep.ignore:
                        if ep.state in ['mirroring', 'reinvestigating']:
                            status = Actions(ep,
                                             self.s.sdnc).unmirror_endpoint()
                            if not status:
                                self.logger.warning(
                                    'Unable to unmirror the endpoint: {0}'.
                                    format(ep.name))
                        if ml_returns[ep.name]['valid']:
                            ml_decision = None
                            if 'decisions' in ml_returns[
                                    ep.name] and 'behavior' in ml_returns[
                                        ep.name]['decisions']:
                                ml_decision = ml_returns[
                                    ep.name]['decisions']['behavior']
                            if ml_decision == 'normal':
                                ep.known()
                            else:
                                ep.abnormal()
                        else:
                            ep.unknown()
                        ep.p_prev_states.append((ep.state, int(time.time())))
                extra_machines = []
                self.logger.debug('extra devices: {0}'.format(extras))
                for device in extras:
                    if device['valid']:
                        extra_machine = {
                            'mac': device['source_mac'],
                            'segment': NO_DATA,
                            'port': NO_DATA,
                            'tenant': NO_DATA,
                            'active': 0,
                            'name': None
                        }
                        try:
                            source_ip = ipaddress.ip_address(
                                device['source_ip'])
                        except ValueError:
                            source_ip = None
                        if source_ip:
                            extra_machine['ipv%u' %
                                          source_ip.version] = str(source_ip)
                        extra_machines.append(extra_machine)
                self.s.find_new_machines(extra_machines)

            queued_endpoints = [
                endpoint for endpoint in self.s.endpoints.values()
                if not endpoint.ignore and endpoint.state == 'queued'
                and endpoint.p_next_state != 'inactive'
            ]
            self.s.investigations = len([
                endpoint for endpoint in self.s.endpoints.values()
                if endpoint.state in ['mirroring', 'reinvestigating']
            ])
            # mirror things in the order they got added to the queue
            queued_endpoints = sorted(queued_endpoints,
                                      key=lambda x: x.p_prev_states[-1][1])

            investigation_budget = max(
                self.controller['max_concurrent_reinvestigations'] -
                self.s.investigations, 0)
            self.logger.debug(
                'investigations {0}, budget {1}, queued {2}'.format(
                    str(self.s.investigations), str(investigation_budget),
                    str(len(queued_endpoints))))

            for endpoint in queued_endpoints[:investigation_budget]:
                endpoint.trigger(endpoint.p_next_state)
                endpoint.p_next_state = None
                endpoint.p_prev_states.append(
                    (endpoint.state, int(time.time())))
                status = Actions(endpoint, self.s.sdnc).mirror_endpoint()
                if status:
                    try:
                        if self.s.r:
                            self.s.r.hincrby('vent_plugin_counts', 'ncapture')
                    except Exception as e:  # pragma: no cover
                        self.logger.error(
                            'Failed to update count of plugins because: {0}'.
                            format(str(e)))
                else:
                    self.logger.warning(
                        'Unable to mirror the endpoint: {0}'.format(
                            endpoint.name))

            for endpoint in self.s.endpoints.values():
                if not endpoint.ignore:
                    if self.s.sdnc:
                        if endpoint.state == 'unknown':
                            endpoint.p_next_state = 'mirror'
                            endpoint.queue()
                            endpoint.p_prev_states.append(
                                (endpoint.state, int(time.time())))
                        elif endpoint.state in [
                                'mirroring', 'reinvestigating'
                        ]:
                            cur_time = int(time.time())
                            # timeout after 2 times the reinvestigation frequency
                            # in case something didn't report back, put back in an
                            # unknown state
                            if cur_time - endpoint.p_prev_states[-1][
                                    1] > 2 * self.controller[
                                        'reinvestigation_frequency']:
                                self.logger.debug(
                                    'timing out: {0} and setting to unknown'.
                                    format(endpoint.name))
                                status = Actions(
                                    endpoint, self.s.sdnc).unmirror_endpoint()
                                if not status:
                                    self.logger.warning(
                                        'Unable to unmirror the endpoint: {0}'.
                                        format(endpoint.name))
                                endpoint.unknown()
                                endpoint.p_prev_states.append(
                                    (endpoint.state, int(time.time())))
                    else:
                        if endpoint.state != 'known':
                            endpoint.known()
        self.s.store_endpoints()
        return

    def get_q_item(self):
        '''
        attempt to get a work item from the queue
        m_queue -> (routing_key, body)
        a read from get_q_item should be of the form
        (boolean,(routing_key, body))
        '''
        found_work = False
        item = None
        global CTRL_C

        if not CTRL_C['STOP']:
            try:
                item = self.m_queue.get(False)
                found_work = True
                self.m_queue.task_done()
            except queue.Empty:  # pragma: no cover
                pass

        return (found_work, item)

    def shutdown(self):
        ''' gracefully shut down. '''
        self.s.clear_filters()
        for job in self.schedule.jobs:
            self.logger.debug('shutdown :{0}'.format(job))
            self.schedule.cancel_job(job)
        if self.rabbit_channel_connection_local:
            self.rabbit_channel_connection_local.close()
        if self.rabbit_channel_connection_local_fa:
            self.rabbit_channel_connection_local_fa.close()
        self.logger.debug('SHUTTING DOWN')
        self.logger.debug('EXITING')
        sys.exit()

    def signal_handler(self, signal, frame):
        ''' hopefully eat a CTRL_C and signal system shutdown '''
        global CTRL_C
        CTRL_C['STOP'] = True
        self.logger.debug('CTRL-C: {0}'.format(CTRL_C))
        try:
            self.shutdown()
        except Exception as e:  # pragma: no cover
            self.logger.debug(
                'Failed to handle signal properly because: {0}'.format(str(e)))
Example #2
0
class Monitor(object):
    def __init__(self, skip_rabbit):
        self.faucet_event = []
        self.m_queue = queue.Queue()
        self.skip_rabbit = skip_rabbit
        self.logger = logger

        # get config options
        self.controller = Config().get_config()

        # timer class to call things periodically in own thread
        self.schedule = schedule

        # setup prometheus
        self.prom = Prometheus()
        try:
            self.prom.initialize_metrics()
        except Exception as e:  # pragma: no cover
            self.logger.debug(
                'Prometheus metrics are already initialized: {0}'.format(
                    str(e)))
        Prometheus.start()

        # initialize sdnconnect
        self.s = SDNConnect()

        # cleanup any old filters
        if isinstance(self.s.sdnc, FaucetProxy):
            Parser().clear_mirrors(self.controller['CONFIG_FILE'])
        elif isinstance(self.s.sdnc, BcfProxy):
            self.s.sdnc.remove_filter_rules()

        # retrieve endpoints from redis
        self.s.get_stored_endpoints()
        # set all retrieved endpoints to inactive at the start
        for endpoint in self.s.endpoints:
            if not endpoint.ignore:
                if endpoint.state != 'inactive':
                    if endpoint.state == 'mirroring':
                        endpoint.p_next_state = 'mirror'
                    elif endpoint.state == 'reinvestigating':
                        endpoint.p_next_state = 'reinvestigate'
                    elif endpoint.state == 'queued':
                        endpoint.p_next_state = 'queue'
                    elif endpoint.state in ['known', 'abnormal']:
                        endpoint.p_next_state = endpoint.state
                    endpoint.endpoint_data['active'] = 0
                    endpoint.inactive()
                    endpoint.p_prev_states.append(
                        (endpoint.state, int(time.time())))
        # store changes to state
        self.s.store_endpoints()

        # schedule periodic scan of endpoints thread
        self.schedule.every(self.controller['scan_frequency']).seconds.do(
            partial(schedule_job_kickurl, func=self))

        # schedule periodic reinvestigations thread
        self.schedule.every(
            self.controller['reinvestigation_frequency']).seconds.do(
                partial(schedule_job_reinvestigation, func=self))

        # schedule all threads
        self.schedule_thread = threading.Thread(target=partial(
            schedule_thread_worker, schedule=self.schedule),
                                                name='st_worker')

    def format_rabbit_message(self, item):
        '''
        read a message off the rabbit_q
        the message should be item = (routing_key,msg)
        '''
        ret_val = {}

        routing_key, my_obj = item
        self.logger.debug('rabbit_message:{0}'.format(my_obj))
        my_obj = json.loads(my_obj)
        self.logger.debug('routing_key:{0}'.format(routing_key))
        if routing_key == 'poseidon.algos.decider':
            self.logger.debug('decider value:{0}'.format(my_obj))
            # TODO if valid response then send along otherwise nothing
            for key in my_obj:
                ret_val[key] = my_obj[key]
        elif routing_key == 'poseidon.action.ignore':
            for name in my_obj:
                for endpoint in self.s.endpoints:
                    if name == endpoint.name:
                        endpoint.ignore = True
        elif routing_key == 'poseidon.action.clear.ignored':
            for name in my_obj:
                for endpoint in self.s.endpoints:
                    if name == endpoint.name:
                        endpoint.ignore = False
        elif routing_key == 'poseidon.action.change':
            for name, state in my_obj:
                for endpoint in self.s.endpoints:
                    if name == endpoint.name:
                        try:
                            if state != 'mirror' and state != 'reinvestigate' and (
                                    endpoint.state == 'mirroring'
                                    or endpoint.state == 'reinvestigating'):
                                status = Actions(
                                    endpoint, self.s.sdnc).unmirror_endpoint()
                                if not status:
                                    self.logger.warning(
                                        'Unable to unmirror the endpoint: {0}'.
                                        format(endpoint.name))
                            endpoint.trigger(state)
                            endpoint.p_next_state = None
                            endpoint.p_prev_states.append(
                                (endpoint.state, int(time.time())))
                            if endpoint.state == 'mirroring' or endpoint.state == 'reinvestigating':
                                status = Actions(
                                    endpoint, self.s.sdnc).mirror_endpoint()
                                if status:
                                    try:
                                        self.r.hincrby('vent_plugin_counts',
                                                       'ncapture')
                                    except Exception as e:  # pragma: no cover
                                        self.logger.error(
                                            'Failed to update count of plugins because: {0}'
                                            .format(str(e)))
                                else:
                                    self.logger.warning(
                                        'Unable to mirror the endpoint: {0}'.
                                        format(endpoint.name))
                        except Exception as e:  # pragma: no cover
                            self.logger.error(
                                'Unable to change endpoint {0} because: {1}'.
                                format(endpoint.name, str(e)))
        elif routing_key == 'poseidon.action.remove':
            remove_list = []
            for name in my_obj:
                for endpoint in self.s.endpoints:
                    if name == endpoint.name:
                        remove_list.append(endpoint)
            for endpoint in remove_list:
                self.s.endpoints.remove(endpoint)
        elif routing_key == 'poseidon.action.remove.ignored':
            remove_list = []
            for endpoint in self.s.endpoints:
                if endpoint.ignore:
                    remove_list.append(endpoint)
            for endpoint in remove_list:
                self.s.endpoints.remove(endpoint)
        elif routing_key == 'poseidon.action.remove.inactives':
            remove_list = []
            for endpoint in self.s.endpoints:
                if endpoint.state == 'inactive':
                    remove_list.append(endpoint)
            for endpoint in remove_list:
                self.s.endpoints.remove(endpoint)
        elif routing_key == self.controller['FA_RABBIT_ROUTING_KEY']:
            self.logger.debug('FAUCET Event:{0}'.format(my_obj))
            for key in my_obj:
                ret_val[key] = my_obj[key]
        return ret_val

    def process(self):
        global CTRL_C
        signal.signal(signal.SIGINT, partial(self.signal_handler))
        while not CTRL_C['STOP']:
            time.sleep(1)

            found_work, item = self.get_q_item()
            ml_returns = {}

            if found_work and item[0] == self.controller[
                    'FA_RABBIT_ROUTING_KEY']:
                self.faucet_event.append(self.format_rabbit_message(item))
                self.logger.debug('Faucet event: {0}'.format(
                    self.faucet_event))
            elif found_work:
                ml_returns = self.format_rabbit_message(item)
                if ml_returns:
                    self.logger.info('ML results: {0}'.format(ml_returns))
                extras = deepcopy(ml_returns)
                # process results from ml output and update impacted endpoints
                for ep in self.s.endpoints:
                    if ep.name in ml_returns:
                        del extras[ep.name]
                    if ep.name in ml_returns and 'valid' in ml_returns[
                            ep.name] and not ep.ignore:
                        if ep.state in ['mirroring', 'reinvestigating']:
                            status = Actions(ep,
                                             self.s.sdnc).unmirror_endpoint()
                            if not status:
                                self.logger.warning(
                                    'Unable to unmirror the endpoint: {0}'.
                                    format(ep.name))
                            self.s.investigations -= 1
                        if ml_returns[ep.name]['valid']:
                            ml_decision = None
                            if 'decisions' in ml_returns[
                                    ep.name] and 'behavior' in ml_returns[
                                        ep.name]['decisions']:
                                ml_decision = ml_returns[
                                    ep.name]['decisions']['behavior']
                            if ml_decision == 'normal':
                                ep.known()
                            else:
                                ep.abnormal()
                        else:
                            ep.unknown()
                        ep.p_prev_states.append((ep.state, int(time.time())))
                extra_machines = []
                for device in extras:
                    if extras[device]['valid']:
                        extra_machine = {
                            'mac': extras[device]['source_mac'],
                            'segment': 'NO DATA',
                            'port': 'NO DATA',
                            'tenant': 'NO DATA',
                            'active': 0,
                            'name': None
                        }
                        if ':' in extras[device]['source_ip']:
                            extra_machine['ipv6'] = extras[device]['source_ip']
                            extra_machine['ipv4'] = 0
                        else:
                            extra_machine['ipv4'] = extras[device]['source_ip']
                            extra_machine['ipv6'] = 0
                        extra_machines.append(extra_machine)
                self.s.find_new_machines(extra_machines)
            # mirror things in the order they got added to the queue
            queued_endpoints = []
            for endpoint in self.s.endpoints:
                if not endpoint.ignore:
                    if endpoint.state == 'queued':
                        queued_endpoints.append(
                            (endpoint.name, endpoint.p_prev_states[-1][1]))
            queued_endpoints = sorted(queued_endpoints, key=lambda x: x[1])
            for ep in queued_endpoints:
                for endpoint in self.s.endpoints:
                    if ep[0] == endpoint.name:
                        if self.s.investigations < self.controller[
                                'max_concurrent_reinvestigations']:
                            self.s.investigations += 1
                            endpoint.trigger(endpoint.p_next_state)
                            endpoint.p_next_state = None
                            endpoint.p_prev_states.append(
                                (endpoint.state, int(time.time())))
                            status = Actions(endpoint,
                                             self.s.sdnc).mirror_endpoint()
                            if status:
                                try:
                                    self.s.r.hincrby('vent_plugin_counts',
                                                     'ncapture')
                                except Exception as e:  # pragma: no cover
                                    self.logger.error(
                                        'Failed to update count of plugins because: {0}'
                                        .format(str(e)))
                            else:
                                self.logger.warning(
                                    'Unable to mirror the endpoint: {0}'.
                                    format(endpoint.name))

            for endpoint in self.s.endpoints:
                if not endpoint.ignore:
                    if self.s.sdnc:
                        if endpoint.state == 'unknown':
                            endpoint.p_next_state = 'mirror'
                            endpoint.queue()
                            endpoint.p_prev_states.append(
                                (endpoint.state, int(time.time())))
                        elif endpoint.state in [
                                'mirroring', 'reinvestigating'
                        ]:
                            cur_time = int(time.time())
                            # timeout after 2 times the reinvestigation frequency
                            # in case something didn't report back, put back in an
                            # unknown state
                            if cur_time - endpoint.p_prev_states[-1][
                                    1] > 2 * self.controller[
                                        'reinvestigation_frequency']:
                                status = Actions(
                                    endpoint, self.s.sdnc).unmirror_endpoint()
                                if not status:
                                    self.logger.warning(
                                        'Unable to unmirror the endpoint: {0}'.
                                        format(endpoint.name))
                                endpoint.unknown()
                                self.s.investigations -= 1
                                endpoint.p_prev_states.append(
                                    (endpoint.state, int(time.time())))
                    else:
                        if endpoint.state != 'known':
                            endpoint.known()
        return

    def get_q_item(self):
        '''
        attempt to get a work item from the queue
        m_queue -> (routing_key, body)
        a read from get_q_item should be of the form
        (boolean,(routing_key, body))
        '''
        found_work = False
        item = None
        global CTRL_C

        if not CTRL_C['STOP']:
            try:
                item = self.m_queue.get(False)
                found_work = True
                self.m_queue.task_done()
            except queue.Empty:  # pragma: no cover
                pass

        return (found_work, item)

    def signal_handler(self, signal, frame):
        ''' hopefully eat a CTRL_C and signal system shutdown '''
        global CTRL_C
        if isinstance(self.s.sdnc, FaucetProxy):
            Parser().clear_mirrors(self.controller['CONFIG_FILE'])
        elif isinstance(self.s.sdnc, BcfProxy):
            self.logger.debug('removing bcf filter rules')
            retval = self.s.sdnc.remove_filter_rules()
            self.logger.debug('removed filter rules: {0}'.format(retval))

        CTRL_C['STOP'] = True
        self.logger.debug('CTRL-C: {0}'.format(CTRL_C))
        try:
            for job in self.schedule.jobs:
                self.logger.debug('CTRLC:{0}'.format(job))
                self.schedule.cancel_job(job)
            self.rabbit_channel_connection_local.close()
            self.rabbit_channel_connection_local_fa.close()

            self.logger.debug('SHUTTING DOWN')
            self.logger.debug('EXITING')
            sys.exit()
        except Exception as e:  # pragma: no cover
            self.logger.debug(
                'Failed to handle signal properly because: {0}'.format(str(e)))
Example #3
0
class Monitor:
    def __init__(self, skip_rabbit):
        self.faucet_event = []
        self.m_queue = queue.Queue()
        self.skip_rabbit = skip_rabbit
        self.logger = logger
        self.rabbit_channel_connection_local = None
        self.rabbit_channel_connection_local_fa = None

        # get config options
        self.controller = Config().get_config()

        # timer class to call things periodically in own thread
        self.schedule = schedule

        # setup prometheus
        self.prom = Prometheus()
        try:
            self.prom.initialize_metrics()
        except Exception as e:  # pragma: no cover
            self.logger.debug(
                'Prometheus metrics are already initialized: {0}'.format(
                    str(e)))
        Prometheus.start()

        # initialize sdnconnect
        self.s = SDNConnect(self.controller)

        # schedule periodic scan of endpoints thread
        self.schedule.every(self.controller['scan_frequency']).seconds.do(
            partial(schedule_job_kickurl, schedule_func=self))

        # schedule periodic reinvestigations thread
        self.schedule.every(
            self.controller['reinvestigation_frequency']).seconds.do(
                partial(schedule_job_reinvestigation, schedule_func=self))

        # schedule all threads
        self.schedule_thread = threading.Thread(target=partial(
            schedule_thread_worker, schedule=self.schedule),
                                                name='st_worker')

    def update_routing_key_time(self, routing_key):
        self.prom.prom_metrics['last_rabbitmq_routing_key_time'].labels(
            routing_key=routing_key).set(time.time())

    def format_rabbit_message(self, item):
        '''
        read a message off the rabbit_q
        the message should be item = (routing_key,msg)
        '''
        routing_key, my_obj = item
        self.logger.debug('routing_key: {0} rabbit_message: {1}'.format(
            routing_key, my_obj))
        my_obj = json.loads(my_obj)

        def handler_algos_decider(my_obj):
            self.logger.debug('decider value:{0}'.format(my_obj))
            for name, message in my_obj.items():
                endpoint = self.s.endpoints.get(name, None)
                if endpoint and message.get('plugin', None) == 'ncapture':
                    endpoint.trigger('unknown')
                    endpoint.p_next_state = None
                    endpoint.p_prev_states.append(
                        (endpoint.state, int(time.time())))
                    if message.get('valid', False):
                        return (my_obj, None)
                    break
            return ({}, None)

        def handler_action_ignore(my_obj):
            for name in my_obj:
                endpoint = self.s.endpoints.get(name, None)
                if endpoint:
                    endpoint.ignore = True
            return ({}, None)

        def handler_action_clear_ignored(my_obj):
            for name in my_obj:
                endpoint = self.s.endpoints.get(name, None)
                if endpoint:
                    endpoint.ignore = False
            return ({}, None)

        def handler_action_change(my_obj):
            for name, state in my_obj:
                endpoint = self.s.endpoints.get(name, None)
                if endpoint:
                    try:
                        if (state != 'mirror' and state != 'reinvestigate'
                                and (endpoint.state == 'mirroring'
                                     or endpoint.state == 'reinvestigating')):
                            self.s.unmirror_endpoint(endpoint)
                        endpoint.trigger(state)
                        endpoint.p_next_state = None
                        endpoint.p_prev_states.append(
                            (endpoint.state, int(time.time())))
                        if endpoint.state == 'mirroring' or endpoint.state == 'reinvestigating':
                            self.s.mirror_endpoint(endpoint)
                    except Exception as e:  # pragma: no cover
                        self.logger.error(
                            'Unable to change endpoint {0} because: {1}'.
                            format(endpoint.name, str(e)))
            return ({}, None)

        def handler_action_update_acls(my_obj):
            for ip in my_obj:
                rules = my_obj[ip]
                endpoints = self.s.endpoints_by_ip(ip)
                if endpoints:
                    endpoint = endpoints[0]
                    try:
                        status = Actions(endpoint, self.s.sdnc).update_acls(
                            rules_file=self.controller['RULES_FILE'],
                            endpoints=endpoints,
                            force_apply_rules=rules)
                        if not status:
                            self.logger.warning(
                                'Unable to apply rules: {0} to endpoint: {1}'.
                                format(rules, endpoint.name))
                    except Exception as e:
                        self.logger.error(
                            'Unable to apply rules: {0} to endpoint: {1} because {2}'
                            .format(rules, endpoint.name, str(e)))
            return ({}, None)

        def handler_action_remove(my_obj):
            remove_list = [name for name in my_obj]
            return ({}, remove_list)

        def handler_action_remove_ignored(_my_obj):
            remove_list = [
                endpoint.name for endpoint in self.s.endpoints.values()
                if endpoint.ignore
            ]
            return ({}, remove_list)

        def handler_action_remove_inactives(_my_obj):
            remove_list = [
                endpoint.name for endpoint in self.s.endpoints.values()
                if endpoint.state == 'inactive'
            ]
            return ({}, remove_list)

        def handler_faucet_event(my_obj):
            self.logger.debug('FAUCET Event:{0}'.format(my_obj))
            self.faucet_event.append(my_obj)
            return (my_obj, None)

        handlers = {
            'poseidon.algos.decider': handler_algos_decider,
            'poseidon.action.ignore': handler_action_ignore,
            'poseidon.action.clear.ignored': handler_action_clear_ignored,
            'poseidon.action.change': handler_action_change,
            'poseidon.action.update_acls': handler_action_update_acls,
            'poseidon.action.remove': handler_action_remove,
            'poseidon.action.remove.ignored': handler_action_remove_ignored,
            'poseidon.action.remove.inactives':
            handler_action_remove_inactives,
            self.controller['FA_RABBIT_ROUTING_KEY']: handler_faucet_event,
        }

        handler = handlers.get(routing_key, None)
        if handler is None:
            self.logger.error(
                'no handler for routing_key {0}'.format(routing_key))
        else:
            ret_val, remove_list = handler(my_obj)
            self.update_routing_key_time(routing_key)
            if remove_list:
                for endpoint_name in remove_list:
                    if endpoint_name in self.s.endpoints:
                        del self.s.endpoints[endpoint_name]
            return (ret_val, True)

        return ({}, False)

    def schedule_mirroring(self):
        queued_endpoints = [
            endpoint for endpoint in self.s.endpoints.values()
            if not endpoint.ignore and endpoint.state == 'queued'
            and endpoint.p_next_state != 'inactive'
        ]
        self.s.investigations = len([
            endpoint for endpoint in self.s.endpoints.values()
            if endpoint.state in ['mirroring', 'reinvestigating']
        ])
        # mirror things in the order they got added to the queue
        queued_endpoints = sorted(queued_endpoints,
                                  key=lambda x: x.p_prev_states[-1][1])

        investigation_budget = max(
            self.controller['max_concurrent_reinvestigations'] -
            self.s.investigations, 0)
        self.logger.debug('investigations {0}, budget {1}, queued {2}'.format(
            str(self.s.investigations), str(investigation_budget),
            str(len(queued_endpoints))))

        for endpoint in queued_endpoints[:investigation_budget]:
            endpoint.trigger(endpoint.p_next_state)
            endpoint.p_next_state = None
            endpoint.p_prev_states.append((endpoint.state, int(time.time())))
            self.s.mirror_endpoint(endpoint)

        for endpoint in self.s.endpoints.values():
            if not endpoint.ignore:
                if self.s.sdnc:
                    if endpoint.state == 'unknown':
                        endpoint.p_next_state = 'mirror'
                        endpoint.queue()
                        endpoint.p_prev_states.append(
                            (endpoint.state, int(time.time())))
                    elif endpoint.state in ['mirroring', 'reinvestigating']:
                        cur_time = int(time.time())
                        # timeout after 2 times the reinvestigation frequency
                        # in case something didn't report back, put back in an
                        # unknown state
                        if cur_time - endpoint.p_prev_states[-1][
                                1] > 2 * self.controller[
                                    'reinvestigation_frequency']:
                            self.logger.debug(
                                'timing out: {0} and setting to unknown'.
                                format(endpoint.name))
                            self.s.unmirror_endpoint(endpoint)
                            endpoint.unknown()
                            endpoint.p_prev_states.append(
                                (endpoint.state, int(time.time())))
                else:
                    if endpoint.state != 'known':
                        endpoint.known()

    def schedule_coprocessing(self):
        queued_endpoints = [
            endpoint for endpoint in self.s.endpoints.values()
            if not endpoint.copro_ignore and endpoint.copro_state == 'queued'
        ]
        self.s.coprocessing = len([
            endpoint for endpoint in self.s.endpoints.values()
            if endpoint.copro_state in ['coprocessing']
        ])
        # mirror things in the order they got added to the queue
        queued_endpoints = sorted(queued_endpoints,
                                  key=lambda x: x.p_prev_copro_states[-1][1])

        coprocessing_budget = max(
            self.controller['max_concurrent_coprocessing'] -
            self.s.coprocessing, 0)
        self.logger.debug('coprocessing {0}, budget {1}, queued {2}'.format(
            str(self.s.coprocessing), str(coprocessing_budget),
            str(len(queued_endpoints))))

        for endpoint in queued_endpoints[:coprocessing_budget]:
            endpoint.trigger(endpoint.p_next_copro_state)
            endpoint.p_next_copro_state = None
            endpoint.p_prev_copro_states.append(
                (endpoint.copro_state, int(time.time())))
            self.s.coprocess_endpoint(endpoint)

        for endpoint in self.s.endpoints.values():
            if not endpoint.copro_ignore:
                if self.s.sdnc:
                    if endpoint.copro_state == 'unknown':
                        endpoint.p_next_copro_state = 'coprocessing'
                        endpoint.queue()
                        endpoint.p_prev_copro_states.append(
                            (endpoint.copro_state, int(time.time())))
                    elif endpoint.copro_state in ['coprocessing']:
                        cur_time = int(time.time())
                        # timeout after 2 times the reinvestigation frequency
                        # in case something didn't report back, put back in an
                        # unknown state
                        if cur_time - endpoint.p_prev_copro_states[-1][
                                1] > 2 * self.controller[
                                    'coprocessing_frequency']:
                            self.logger.debug(
                                'timing out: {0} and setting to unknown'.
                                format(endpoint.name))
                            self.s.unmirror_endpoint(endpoint)
                            endpoint.unknown()
                            endpoint.p_prev_copro_states.append(
                                (endpoint.copro_state, int(time.time())))
                else:
                    if endpoint.state != 'nominal':
                        endpoint.nominal()

    def process(self):
        global CTRL_C
        signal.signal(signal.SIGINT, partial(self.signal_handler))
        while not CTRL_C['STOP']:
            time.sleep(1)

            found_work, item = self.get_q_item()

            if found_work:
                self.format_rabbit_message(item)

            self.schedule_mirroring()

        self.s.store_endpoints()

    def get_q_item(self):
        '''
        attempt to get a work item from the queue
        m_queue -> (routing_key, body)
        a read from get_q_item should be of the form
        (boolean,(routing_key, body))
        '''
        found_work = False
        item = None
        global CTRL_C

        if not CTRL_C['STOP']:
            try:
                item = self.m_queue.get(False)
                found_work = True
                self.m_queue.task_done()
            except queue.Empty:  # pragma: no cover
                pass

        return (found_work, item)

    def shutdown(self):
        ''' gracefully shut down. '''
        self.s.clear_filters()
        for job in self.schedule.jobs:
            self.logger.debug('shutdown :{0}'.format(job))
            self.schedule.cancel_job(job)
        if self.rabbit_channel_connection_local:
            self.rabbit_channel_connection_local.close()
        if self.rabbit_channel_connection_local_fa:
            self.rabbit_channel_connection_local_fa.close()
        self.logger.debug('SHUTTING DOWN')
        self.logger.debug('EXITING')
        sys.exit()

    def signal_handler(self, _signal, _frame):
        ''' hopefully eat a CTRL_C and signal system shutdown '''
        global CTRL_C
        CTRL_C['STOP'] = True
        self.logger.debug('CTRL-C: {0}'.format(CTRL_C))
        try:
            self.shutdown()
        except Exception as e:  # pragma: no cover
            self.logger.debug(
                'Failed to handle signal properly because: {0}'.format(str(e)))
Example #4
0
class Monitor:
    def __init__(self, skip_rabbit, controller=None):
        self.faucet_event = []
        self.m_queue = queue.Queue()
        self.job_queue = queue.Queue()
        self.skip_rabbit = skip_rabbit
        self.logger = logger
        self.rabbit_channel_connection_local = None
        self.rabbit_channel_connection_local_fa = None

        # get config options
        if controller is None:
            self.controller = Config().get_config()
        else:
            self.controller = controller

        # timer class to call things periodically in own thread
        self.schedule = schedule

        # setup prometheus
        self.prom = Prometheus()
        try:
            self.prom.initialize_metrics()
        except Exception as e:  # pragma: no cover
            self.logger.debug(
                'Prometheus metrics are already initialized: {0}'.format(
                    str(e)))
        Prometheus.start()

        # initialize sdnconnect
        self.s = SDNConnect(self.controller)

        # schedule periodic scan of endpoints thread
        self.schedule.every(self.controller['scan_frequency']).seconds.do(
            self.schedule_job_kickurl)

        # schedule periodic reinvestigations thread
        self.schedule.every(
            self.controller['reinvestigation_frequency']).seconds.do(
                self.schedule_job_reinvestigation)

        # schedule all threads
        self.schedule_thread = threading.Thread(target=partial(
            schedule_thread_worker, schedule=self.schedule),
                                                name='st_worker')

    def _update_metrics(self):
        self.logger.debug('updating metrics')
        try:
            # get current state
            req = requests.get('http://poseidon-api:8000/v1/network_full',
                               timeout=10)
            # send results to prometheus
            hosts = req.json()['dataset']
            self.prom.update_metrics(hosts)
        except (requests.exceptions.ConnectionError,
                Exception) as e:  # pragma: no cover
            self.logger.error(
                'Unable to get current state and send it to Prometheus because: {0}'
                .format(str(e)))

    def job_kickurl(self):
        self.s.check_endpoints(messages=self.faucet_event)
        del self.faucet_event[:]
        self._update_metrics()

    def job_reinvestigation(self):
        ''' put endpoints into the reinvestigation state if possible '''
        def trigger_reinvestigation(candidates):
            # get random order of things that are known
            for _ in range(self.controller['max_concurrent_reinvestigations'] -
                           self.s.investigations):
                if len(candidates) > 0:
                    chosen = candidates.pop()
                    self.logger.info(
                        'Starting reinvestigation on: {0} {1}'.format(
                            chosen.name, chosen.state))
                    chosen.reinvestigate()  # pytype: disable=attribute-error
                    chosen.p_prev_states.append(
                        (chosen.state, int(time.time())))
                    self.s.mirror_endpoint(chosen)

        candidates = [
            endpoint for endpoint in self.s.endpoints.values()
            if endpoint.state in ['queued']
        ]
        if len(candidates) == 0:
            # if no queued endpoints, then known and abnormal are candidates
            candidates = [
                endpoint for endpoint in self.s.endpoints.values()
                if endpoint.state in ['known', 'abnormal']
            ]
            if len(candidates) > 0:
                random.shuffle(candidates)
        if self.s.sdnc:
            trigger_reinvestigation(candidates)

    def queue_job(self, job):
        if self.job_queue.qsize() < 2:
            self.job_queue.put(job)

    def schedule_job_kickurl(self):
        self.queue_job(self.job_kickurl)

    def schedule_job_reinvestigation(self):
        self.queue_job(self.job_reinvestigation)

    def update_routing_key_time(self, routing_key):
        self.prom.prom_metrics['last_rabbitmq_routing_key_time'].labels(
            routing_key=routing_key).set(time.time())

    def format_rabbit_message(self, item):
        '''
        read a message off the rabbit_q
        the message should be item = (routing_key,msg)
        '''
        routing_key, my_obj = item
        self.logger.debug('routing_key: {0} rabbit_message: {1}'.format(
            routing_key, my_obj))
        my_obj = json.loads(my_obj)

        def handler_algos_decider(my_obj):
            self.logger.debug('decider value:{0}'.format(my_obj))
            data = my_obj.get('data', None)
            results = my_obj.get('results', {})
            tool = results.get('tool', None)
            if isinstance(data, dict):
                if tool == 'p0f':
                    if self.s.prc.store_p0f_result(data):
                        return (data, None)
                elif tool == 'networkml':
                    self.s.prc.store_tool_result(my_obj, 'networkml')
                    for name, message in data.items():
                        endpoint = self.s.endpoints.get(name, None)
                        if endpoint:
                            self.logger.debug(
                                'processing networkml results for %s', name)
                            self.s.unmirror_endpoint(endpoint)
                            # pytype: disable=attribute-error
                            endpoint.trigger('unknown')
                            endpoint.p_next_state = None
                            endpoint.p_prev_states.append(
                                (endpoint.state, int(time.time())))
                            if message.get('valid', False):
                                return (data, None)
                            break
                        else:
                            self.logger.debug(
                                'endpoint %s from networkml not found', name)
            return ({}, None)

        def handler_action_ignore(my_obj):
            for name in my_obj:
                endpoint = self.s.endpoints.get(name, None)
                if endpoint:
                    endpoint.ignore = True
            return ({}, None)

        def handler_action_clear_ignored(my_obj):
            for name in my_obj:
                endpoint = self.s.endpoints.get(name, None)
                if endpoint:
                    endpoint.ignore = False
            return ({}, None)

        def handler_action_change(my_obj):
            for name, state in my_obj:
                endpoint = self.s.endpoints.get(name, None)
                if endpoint:
                    try:
                        if (state != 'mirror' and state != 'reinvestigate'
                                and (endpoint.state == 'mirroring'
                                     or endpoint.state == 'reinvestigating')):
                            self.s.unmirror_endpoint(endpoint)
                        # pytype: disable=attribute-error
                        endpoint.trigger(state)
                        endpoint.p_next_state = None
                        endpoint.p_prev_states.append(
                            (endpoint.state, int(time.time())))
                        if endpoint.state == 'mirroring' or endpoint.state == 'reinvestigating':
                            self.s.mirror_endpoint(endpoint)
                    except Exception as e:  # pragma: no cover
                        self.logger.error(
                            'Unable to change endpoint {0} because: {1}'.
                            format(endpoint.name, str(e)))
            return ({}, None)

        def handler_action_update_acls(my_obj):
            for ip in my_obj:
                rules = my_obj[ip]
                endpoints = self.s.endpoints_by_ip(ip)
                if endpoints:
                    endpoint = endpoints[0]
                    try:
                        status = Actions(endpoint, self.s.sdnc).update_acls(
                            rules_file=self.controller['RULES_FILE'],
                            endpoints=endpoints,
                            force_apply_rules=rules)
                        if not status:
                            self.logger.warning(
                                'Unable to apply rules: {0} to endpoint: {1}'.
                                format(rules, endpoint.name))
                    except Exception as e:
                        self.logger.error(
                            'Unable to apply rules: {0} to endpoint: {1} because {2}'
                            .format(rules, endpoint.name, str(e)))
            return ({}, None)

        def handler_action_remove(my_obj):
            remove_list = [name for name in my_obj]
            return ({}, remove_list)

        def handler_action_remove_ignored(_my_obj):
            remove_list = [
                endpoint.name for endpoint in self.s.endpoints.values()
                if endpoint.ignore
            ]
            return ({}, remove_list)

        def handler_action_remove_inactives(_my_obj):
            remove_list = [
                endpoint.name for endpoint in self.s.endpoints.values()
                if endpoint.state == 'inactive'
            ]
            return ({}, remove_list)

        def handler_faucet_event(my_obj):
            if self.s and self.s.sdnc:
                if not self.s.sdnc.ignore_event(my_obj):
                    self.faucet_event.append(my_obj)
                    return (my_obj, None)
            return ({}, None)

        handlers = {
            'poseidon.algos.decider': handler_algos_decider,
            'poseidon.action.ignore': handler_action_ignore,
            'poseidon.action.clear.ignored': handler_action_clear_ignored,
            'poseidon.action.change': handler_action_change,
            'poseidon.action.update_acls': handler_action_update_acls,
            'poseidon.action.remove': handler_action_remove,
            'poseidon.action.remove.ignored': handler_action_remove_ignored,
            'poseidon.action.remove.inactives':
            handler_action_remove_inactives,
            self.controller['FA_RABBIT_ROUTING_KEY']: handler_faucet_event,
        }

        handler = handlers.get(routing_key, None)
        if handler is None:
            self.logger.error(
                'no handler for routing_key {0}'.format(routing_key))
        else:
            ret_val, remove_list = handler(my_obj)
            self.update_routing_key_time(routing_key)
            if remove_list:
                for endpoint_name in remove_list:
                    if endpoint_name in self.s.endpoints:
                        del self.s.endpoints[endpoint_name]
            return (ret_val, True)

        return ({}, False)

    def schedule_mirroring(self):
        queued_endpoints = [
            endpoint for endpoint in self.s.endpoints.values()
            if not endpoint.ignore and endpoint.state == 'queued'
            and endpoint.p_next_state != 'inactive'
        ]
        self.s.investigations = len([
            endpoint for endpoint in self.s.endpoints.values()
            if endpoint.state in ['mirroring', 'reinvestigating']
        ])
        # mirror things in the order they got added to the queue
        queued_endpoints = sorted(queued_endpoints,
                                  key=lambda x: x.p_prev_states[-1][1])

        investigation_budget = max(
            self.controller['max_concurrent_reinvestigations'] -
            self.s.investigations, 0)
        self.logger.debug('investigations {0}, budget {1}, queued {2}'.format(
            str(self.s.investigations), str(investigation_budget),
            str(len(queued_endpoints))))

        for endpoint in queued_endpoints[:investigation_budget]:
            # pytype: disable=attribute-error
            endpoint.trigger(endpoint.p_next_state)
            endpoint.p_next_state = None
            endpoint.p_prev_states.append((endpoint.state, int(time.time())))
            self.s.mirror_endpoint(endpoint)

        for endpoint in self.s.endpoints.values():
            if not endpoint.ignore:
                if self.s.sdnc:
                    if endpoint.state == 'unknown':
                        endpoint.p_next_state = 'mirror'
                        endpoint.queue()  # pytype: disable=attribute-error
                        endpoint.p_prev_states.append(
                            (endpoint.state, int(time.time())))
                    elif endpoint.state in ['mirroring', 'reinvestigating']:
                        cur_time = int(time.time())
                        # timeout after 2 times the reinvestigation frequency
                        # in case something didn't report back, put back in an
                        # unknown state
                        if cur_time - endpoint.p_prev_states[-1][
                                1] > 2 * self.controller[
                                    'reinvestigation_frequency']:
                            self.logger.debug(
                                'timing out: {0} and setting to unknown'.
                                format(endpoint.name))
                            self.s.unmirror_endpoint(endpoint)
                            endpoint.unknown()  # pytype: disable=attribute-error
                            endpoint.p_prev_states.append(
                                (endpoint.state, int(time.time())))
                else:
                    if endpoint.state != 'known':
                        endpoint.known()  # pytype: disable=attribute-error

    def schedule_coprocessing(self):
        queued_endpoints = [
            endpoint for endpoint in self.s.endpoints.values()
            if not endpoint.copro_ignore
            and endpoint.copro_state == 'copro_queued'
        ]  # pytype: disable=attribute-error
        self.s.coprocessing = len([
            endpoint for endpoint in self.s.endpoints.values()
            if endpoint.copro_state in ['copro_coprocessing']
        ])
        # mirror things in the order they got added to the queue
        queued_endpoints = sorted(queued_endpoints,
                                  key=lambda x: x.p_prev_copro_states[-1][1])

        coprocessing_budget = max(
            self.controller['max_concurrent_coprocessing'] -
            self.s.coprocessing, 0)
        self.logger.debug('coprocessing {0}, budget {1}, queued {2}'.format(
            str(self.s.coprocessing), str(coprocessing_budget),
            str(len(queued_endpoints))))

        for endpoint in queued_endpoints[:coprocessing_budget]:
            # pytype: disable=attribute-error
            endpoint.trigger(endpoint.p_next_copro_state)
            endpoint.p_next_copro_state = None  # pytype: disable=attribute-error
            endpoint.p_prev_copro_states.append(  # pytype: disable=attribute-error
                (endpoint.copro_state, int(time.time())))
            self.s.coprocess_endpoint(endpoint)

        for endpoint in self.s.endpoints.values():
            if not endpoint.copro_ignore:  # pytype: disable=attribute-error
                if self.s.sdnc:
                    if endpoint.copro_state == 'copro_unknown':  # pytype: disable=attribute-error
                        endpoint.p_next_copro_state = 'copro_coprocessing'
                        endpoint.copro_queue()  # pytype: disable=attribute-error
                        endpoint.p_prev_copro_states.append(  # pytype: disable=attribute-error
                            (endpoint.copro_state, int(time.time())))
                    # pytype: disable=attribute-error
                    elif endpoint.copro_state in ['copro_coprocessing']:
                        cur_time = int(time.time())
                        # timeout after 2 times the reinvestigation frequency
                        # in case something didn't report back, put back in an
                        # unknown state
                        # pytype: disable=attribute-error
                        if cur_time - endpoint.p_prev_copro_states[-1][
                                1] > 2 * self.controller[
                                    'coprocessing_frequency']:
                            self.logger.debug(
                                'timing out: {0} and setting to unknown'.
                                format(endpoint.name))
                            self.s.uncoprocess_endpoint(endpoint)
                            endpoint.copro_unknown()  # pytype: disable=attribute-error
                            endpoint.p_prev_copro_states.append(  # pytype: disable=attribute-error
                                (endpoint.copro_state, int(time.time())))  # pytype: disable=attribute-error
                else:
                    if endpoint.state != 'copro_nominal':
                        endpoint.copro_nominal()  # pytype: disable=attribute-error

    def process(self):
        global CTRL_C
        signal.signal(signal.SIGINT, partial(self.signal_handler))
        while not CTRL_C['STOP']:
            while True:
                found_work, rabbit_msg = self.get_q_item(self.m_queue,
                                                         timeout=0)
                if not found_work:
                    break
                self.format_rabbit_message(rabbit_msg)
            self.s.refresh_endpoints()
            found_work, schedule_func = self.get_q_item(self.job_queue)
            if found_work and callable(schedule_func):
                self.logger.info('calling %s', schedule_func)
                start_time = time.time()
                schedule_func()
                self.logger.debug('%s done (%.1f sec)' %
                                  (schedule_func, time.time() - start_time))
            self.schedule_mirroring()

        self.s.refresh_endpoints()

    def get_q_item(self, q, timeout=1):
        '''
        attempt to get a work item from the queue
        m_queue -> (routing_key, body)
        a read from get_q_item should be of the form
        (boolean,(routing_key, body))
        '''
        global CTRL_C
        if not CTRL_C['STOP']:
            try:
                if timeout:
                    return (True, q.get(True, timeout=timeout))
                return (True, q.get_nowait())
            except queue.Empty:  # pragma: no cover
                pass

        return (False, None)

    def shutdown(self):
        ''' gracefully shut down. '''
        self.s.clear_filters()
        for job in self.schedule.jobs:
            self.logger.debug('shutdown :{0}'.format(job))
            self.schedule.cancel_job(job)
        if self.rabbit_channel_connection_local:
            self.rabbit_channel_connection_local.close()
        if self.rabbit_channel_connection_local_fa:
            self.rabbit_channel_connection_local_fa.close()
        self.logger.debug('SHUTTING DOWN')
        self.logger.debug('EXITING')
        sys.exit()

    def signal_handler(self, _signal, _frame):
        ''' hopefully eat a CTRL_C and signal system shutdown '''
        global CTRL_C
        CTRL_C['STOP'] = True
        self.logger.debug('CTRL-C: {0}'.format(CTRL_C))
        try:
            self.shutdown()
        except Exception as e:  # pragma: no cover
            self.logger.debug(
                'Failed to handle signal properly because: {0}'.format(str(e)))
Example #5
0
class Monitor(object):
    def __init__(self, skip_rabbit):
        self.faucet_event = []
        self.m_queue = queue.Queue()
        self.skip_rabbit = skip_rabbit
        self.logger = logger

        # get config options
        self.controller = Config().get_config()

        # timer class to call things periodically in own thread
        self.schedule = schedule

        # setup prometheus
        self.prom = Prometheus()
        try:
            self.prom.initialize_metrics()
        except Exception as e:  # pragma: no cover
            self.logger.debug(
                'Prometheus metrics are already initialized: {0}'.format(
                    str(e)))
        Prometheus.start()

        # initialize sdnconnect
        self.s = SDNConnect()

        # retrieve endpoints from redis
        self.s.get_stored_endpoints()
        # set all retrieved endpoints to inactive at the start
        for endpoint in self.s.endpoints:
            if not endpoint.ignore:
                if endpoint.state != 'inactive':
                    if endpoint.state == 'mirroring':
                        endpoint.p_next_state = 'mirror'
                    elif endpoint.state == 'reinvestigating':
                        endpoint.p_next_state = 'reinvestigate'
                    elif endpoint.state == 'queued':
                        endpoint.p_next_state = 'queue'
                    elif endpoint.state in ['known', 'abnormal']:
                        endpoint.p_next_state = endpoint.state
                    endpoint.endpoint_data['active'] = 0
                    endpoint.inactive()
                    endpoint.p_prev_states.append(
                        (endpoint.state, int(time.time())))
        # store changes to state
        self.s.store_endpoints()

        # schedule periodic scan of endpoints thread
        self.schedule.every(self.controller['scan_frequency']).seconds.do(
            partial(schedule_job_kickurl, func=self))

        # schedule periodic reinvestigations thread
        self.schedule.every(
            self.controller['reinvestigation_frequency']).seconds.do(
                partial(schedule_job_reinvestigation, func=self))

        # schedule all threads
        self.schedule_thread = threading.Thread(target=partial(
            schedule_thread_worker, schedule=self.schedule),
                                                name='st_worker')

    def format_rabbit_message(self, item):
        ''' read a message off the rabbit_q
        the message should be item = (routing_key,msg)
        '''
        ret_val = {}

        routing_key, my_obj = item
        self.logger.debug('rabbit_message:{0}'.format(my_obj))
        # my_obj: (hash,data)
        my_obj = json.loads(my_obj)
        self.logger.debug('routing_key:{0}'.format(routing_key))
        if routing_key == 'poseidon.algos.decider':
            self.logger.debug('decider value:{0}'.format(my_obj))
            # TODO if valid response then send along otherwise nothing
            for key in my_obj:
                ret_val[key] = my_obj[key]
        elif routing_key == self.controller['FA_RABBIT_ROUTING_KEY']:
            self.logger.debug('FAUCET Event:{0}'.format(my_obj))
            for key in my_obj:
                ret_val[key] = my_obj[key]
        return ret_val

    def process(self):
        global CTRL_C
        signal.signal(signal.SIGINT, partial(self.signal_handler))
        while not CTRL_C['STOP']:
            time.sleep(1)
            found_work, item = self.get_q_item()
            ml_returns = {}

            if found_work and item[0] == self.controller[
                    'FA_RABBIT_ROUTING_KEY']:
                self.faucet_event.append(self.format_rabbit_message(item))
                self.logger.debug('Faucet event: {0}'.format(
                    self.faucet_event))
            elif found_work:
                ml_returns = self.format_rabbit_message(item)
                self.logger.info('ML results: {0}'.format(ml_returns))
                # process results from ml output and update impacted endpoints
                for ep in self.s.endpoints:
                    if ep.name in ml_returns and 'valid' in ml_returns[
                            ep.name] and not ep.ignore:
                        if ep.state in ['mirroring', 'reinvestigating']:
                            status = Actions(ep,
                                             self.s.sdnc).unmirror_endpoint()
                            if not status:
                                self.logger.warning(
                                    'Unable to unmirror the endpoint: {0}'.
                                    format(ep.name))
                            self.s.investigations -= 1
                        if ml_returns[ep.name]['valid']:
                            ml_decision = None
                            if 'decisions' in ml_returns[
                                    ep.name] and 'behavior' in ml_returns[
                                        ep.name]['decisions']:
                                ml_decision = ml_returns[
                                    ep.name]['decisions']['behavior']
                            if ml_decision == 'normal':
                                ep.known()
                            else:
                                ep.abnormal()
                        else:
                            ep.unknown()
                        ep.p_prev_states.append((ep.state, int(time.time())))

            for endpoint in self.s.endpoints:
                if not endpoint.ignore:
                    if endpoint.state == 'queued':
                        if self.s.investigations < self.controller[
                                'max_concurrent_reinvestigations']:
                            self.s.investigations += 1
                            endpoint.trigger(endpoint.p_next_state)
                            endpoint.p_next_state = None
                            endpoint.p_prev_states.append(
                                (endpoint.state, int(time.time())))
                            status = Actions(endpoint,
                                             self.s.sdnc).mirror_endpoint()
                            if not status:
                                self.logger.warning(
                                    'Unable to mirror the endpoint: {0}'.
                                    format(endpoint.name))
                    elif endpoint.state == 'unknown':
                        # move to mirroring state
                        if self.s.investigations < self.controller[
                                'max_concurrent_reinvestigations']:
                            self.s.investigations += 1
                            endpoint.mirror()
                            endpoint.p_prev_states.append(
                                (endpoint.state, int(time.time())))
                            status = Actions(endpoint,
                                             self.s.sdnc).mirror_endpoint()
                            if not status:
                                self.logger.warning(
                                    'Unable to mirror the endpoint: {0}'.
                                    format(endpoint.name))
                        else:
                            endpoint.p_next_state = 'mirror'
                            endpoint.queue()
                            endpoint.p_prev_states.append(
                                (endpoint.state, int(time.time())))
                    elif endpoint.state in ['mirroring', 'reinvestigating']:
                        cur_time = int(time.time())
                        # timeout after 2 times the reinvestigation frequency
                        # in case something didn't report back, put back in an
                        # unknown state
                        if cur_time - endpoint.p_prev_states[-1][
                                1] > 2 * self.controller[
                                    'reinvestigation_frequency']:
                            status = Actions(endpoint,
                                             self.s.sdnc).unmirror_endpoint()
                            if not status:
                                self.logger.warning(
                                    'Unable to unmirror the endpoint: {0}'.
                                    format(endpoint.name))
                            endpoint.unknown()
                            self.s.investigations -= 1
                            endpoint.p_prev_states.append(
                                (endpoint.state, int(time.time())))

    def get_q_item(self):
        '''
        attempt to get a work item from the queue
        m_queue -> (routing_key, body)
        a read from get_q_item should be of the form
        (boolean,(routing_key, body))
        '''
        found_work = False
        item = None
        global CTRL_C

        if not CTRL_C['STOP']:
            try:
                item = self.m_queue.get(False)
                found_work = True
                self.m_queue.task_done()
            except queue.Empty:  # pragma: no cover
                pass

        return (found_work, item)

    def signal_handler(self, signal, frame):
        ''' hopefully eat a CTRL_C and signal system shutdown '''
        global CTRL_C
        CTRL_C['STOP'] = True
        self.logger.debug('=================CTRLC{0}'.format(CTRL_C))
        try:
            for job in self.schedule.jobs:
                self.logger.debug('CTRLC:{0}'.format(job))
                self.schedule.cancel_job(job)
            self.rabbit_channel_connection_local.close()
            self.rabbit_channel_connection_local_fa.close()
            sys.exit()
        except BaseException:  # pragma: no cover
            pass