class RabbitMQ(Collector): parameters = { 'uri': StringParameter(default='http://localhost:15672/api/overview'), 'user': StringParameter(default='guest'), 'password': StringParameter(default='guest'), } def launch(self): logger = self.logger logger.debug('getRabbitMQStatus: start') if not self.is_in_group('rabbitmq'): self.set_not_eligible( 'Please add the rabbitmq group to enable this collector.') return try: uri = self.get_parameter('uri') user = self.get_parameter('user') password = self.get_parameter('password') response = httper.get(uri, timeout=3, user=user, password=password) except get_http_exceptions() as e: self.set_error('Unable to get RabbitMQ status - HTTPError = %s' % e) return False except Exception: self.set_error('Unable to get RabbitMQ status - Exception = %s' % traceback.format_exc()) return False try: status = jsoner.loads(response) except Exception as exp: self.set_error("Rabbitmq: parsing json: %s" % exp) return False return status
class WebSocketModule(ListenerModule): implement = 'websocket' parameters = { 'enabled': BoolParameter(default=False), 'port' : IntParameter(default=6769), 'address': StringParameter(default='0.0.0.0'), } def __init__(self): ListenerModule.__init__(self) self.websocket = {} self.webso = None def get_info(self): r = {'log': ''} r['configuration'] = self.websocket r['state'] = 'STARTED' if self.websocket['enabled'] else 'DISABLED' if not self.webso: r['websocket_info'] = None else: r['websocket_info'] = self.webso.get_info() return r def prepare(self): self.websocket['enabled'] = self.get_parameter('enabled') self.websocket['port'] = self.get_parameter('port') self.websocket['address'] = self.get_parameter('address') def launch(self): if not self.websocket['enabled']: self.logger.log('Websocket object defined in the configuration is disabled, skipping websocket launch') return threader.create_and_launch(self.do_launch, name='Websocket port:%d listening' % self.websocket.get('port'), essential=True, part='websocket') def do_launch(self): self.webso = WebSocketBackend(self.websocket) # also load it in the websockermanager so other part # can easily forward messages websocketmgr.set(self.webso) self.webso.run()
class ShinkenModule(ConnectorModule): implement = 'shinken' parameters = { 'enabled': BoolParameter(default=False), 'cfg_path': StringParameter(default='/etc/shinken/agent'), 'reload_command': StringParameter(default='/etc/init.d/shinken reload'), 'monitoring_tool': StringParameter(default='shinken'), 'external_command_file': StringParameter(default='/var/lib/shinken/shinken.cmd'), } def __init__(self): ConnectorModule.__init__(self) self.regenerate_flag = False self.reload_flag = False self.cfg_path = None self.node_changes = [] self.reload_command = '' self.monitoring_tool = 'shinken' self.external_command_file = '/var/lib/shinken/shinken.cmd' self.enabled = False self.export_states_uuids = set() def prepare(self): self.logger.info('SHINKEN: prepare phase') self.cfg_path = os.path.abspath(self.get_parameter('cfg_path')) self.reload_command = self.get_parameter('reload_command') self.monitoring_tool = self.get_parameter('monitoring_tool') self.external_command_file = self.get_parameter( 'external_command_file') self.enabled = self.get_parameter('enabled') # Simulate that we are a new node, to always export our states at startup self.node_changes.append(('new-node', gossiper.uuid)) # register to node events pubsub.sub('new-node', self.new_node_callback) pubsub.sub('delete-node', self.delete_node_callback) pubsub.sub('change-node', self.change_node_callback) def get_info(self): state = 'STARTED' if self.enabled else 'DISABLED' log = '' return {'configuration': self.get_config(), 'state': state, 'log': log} def launch(self): self.shinken_thread = threader.create_and_launch( self.main_thread, name='Export nodes/checks and states to Shinken', essential=True, part='shinken') def new_node_callback(self, node_uuid=None): if not self.enabled: return self.node_changes.append(('new-node', node_uuid)) self.regenerate_flag = True def delete_node_callback(self, node_uuid=None): if not self.enabled: return self.node_changes.append(('delete-node', node_uuid)) self.regenerate_flag = True def change_node_callback(self, node_uuid=None): if not self.enabled: return self.node_changes.append(('change-node', node_uuid)) self.regenerate_flag = True def sanatize_check_name(self, cname): return 'Agent-%s' % cname.split('/')[-1] def export_all_states(self): p = self.external_command_file if not os.path.exists(p): self.logger.warning( 'Shinken command file %s is missing, skipping node information export' % p) return # Now the nagios is ready, we can export our states for nid in self.export_states_uuids: self.__export_states_into_shinken( nid) # update it's inner checks states self.export_states_uuids.clear() def __export_states_into_shinken(self, nuuid): p = self.external_command_file v = kvmgr.get_key('__health/%s' % nuuid) if v is None or v == '': self.logger.error('Cannot access to the checks list for', nuuid) return lst = jsoner.loads(v) for cname in lst: v = kvmgr.get_key('__health/%s/%s' % (nuuid, cname)) if v is None: # missing check entry? not a real problem continue check = jsoner.loads(v) self.logger.debug('CHECK VALUE %s' % check) try: mode = 'w' if PY3 else 'a' # codecs.open got issue with a in python 3 f = codecs.open(p, mode, encoding="utf-8") cmd = '[%s] PROCESS_SERVICE_CHECK_RESULT;%s;%s;%d;%s\n' % ( int(time.time()), nuuid, self.sanatize_check_name(cname), check['state_id'], check['output']) self.logger.debug('SAVING COMMAND %s' % cmd) f.write(cmd) f.flush() f.close() except Exception as exp: self.logger.error('Shinken command file write fail: %s' % exp) return def __get_node_cfg_sha_paths(self, nid): cfg_p = os.path.join(self.cfg_path, nid + '.cfg') sha_p = os.path.join(self.cfg_path, nid + '.sha1') return (cfg_p, sha_p) def generate_node_file(self, n): uuid = n.get('uuid') if not os.path.exists(self.cfg_path): try: os.mkdir(self.cfg_path) except Exception as exp: self.logger.error('Cannot create shinken directory at %s : %s', self.cfg_path, str(exp)) return self.logger.debug('Generating cfg/sha file for node %s' % n) p, shap = self.__get_node_cfg_sha_paths(uuid) # p = os.path.join(self.cfg_path, uuid + '.cfg') ptmp = p + '.tmp' # shap = os.path.join(self.cfg_path, uuid + '.sha1') shaptmp = shap + '.tmp' old_sha_value = '' if os.path.exists(shap): try: f = open(shap, 'r') old_sha_value = f.read().strip() f.close() except Exception as exp: self.logger.error('Cannot read old sha file value at %s: %s' % (shap, exp)) tpls = n.get('groups', [])[:] # make a copy, because we will modify it zone = n.get('zone', '') if zone: tpls.append(zone) tpls.insert(0, 'agent,opsbro') # get checks names and sort them so file il always the same cnames = list(n.get('checks', {}).keys()) # list() for python3 cnames.sort() # Services must be purely passive, and will only trigger once buf_service = '''define service{ host_name %s service_description %s use generic-service active_checks_enabled 0 passive_checks_enabled 1 check_command check-host-alive max_check_attempts 1 \n}\n ''' # NOTE: nagios is not liking templates that are not exiting, so only export with generic-host # shinken don't care, so we can give all we want here use_value = ','.join(tpls) if self.monitoring_tool == 'nagios': use_value = 'generic-host' buf = '''# Auto generated host, do not edit \ndefine host{ host_name %s display_name %s address %s use %s check_period 24x7 check_interval 1 retry_interval 1 max_check_attempts 2 \n}\n \n%s\n''' % (n['uuid'], n['name'], n['addr'], use_value, '\n'.join([ buf_service % (n['uuid'], self.sanatize_check_name(cname)) for cname in cnames ])) buf_sha = get_sha1_hash(buf) # if it the same as before? self.logger.debug('COMPARING OLD SHA/NEWSHA= %s %s' % (old_sha_value, buf_sha)) if buf_sha == old_sha_value: self.logger.debug('SAME SHA VALUE, SKIP IT') return self.logger.info('Will generate in path %s (sha1=%s): \n%s' % (p, buf_sha, buf)) try: # open both file, so if one goes wrong, will be consistent fcfg = open(ptmp, 'w') fsha = open(shaptmp, 'w') # save cfg file fcfg.write(buf) fcfg.close() shutil.move(ptmp, p) # and then sha one fsha.write(buf_sha) fsha.close() shutil.move(shaptmp, shap) except IOError as exp: try: fcfg.close() except: pass try: fsha.close() except: pass self.logger.error('Cannot create shinken node file at %s : %s' % (p, exp)) return self.logger.info('Generated file %s for node %s' % (p, uuid)) # We did change configuration, reload shinken self.reload_flag = True # A specific node id was detected as not need, try to clean it def clean_node_files(self, nid): cfgp, shap = self.__get_node_cfg_sha_paths(nid) if os.path.exists(cfgp): try: os.unlink(cfgp) # We did remove a file, reload shinken so self.reload_flag = True except IOError as exp: self.logger.error('Cannot remove deprecated file %s' % cfgp) if os.path.exists(shap): try: os.unlink(shap) except IOError as exp: self.logger.error('Cannot remove deprecated file %s' % shap) def clean_cfg_dir(self): if not self.cfg_path: # nothing to clean... return node_keys = gossiper.nodes.keys() self.logger.debug('Current nodes uuids: %s' % node_keys) # First look at cfg file that don't match our inner elements, based on their file name # Note: if the user did do something silly, no luck for him! cfgs = glob.glob('%s/*.cfg' % self.cfg_path) self.logger.info('Looking at files for cleaning %s' % cfgs) lpath = len(self.cfg_path) + 1 for cfg in cfgs: fuuid_ = cfg[ lpath:-len('.cfg')] # get only the uuid part of the file name self.logger.debug('Should we clean cfg file %s' % fuuid_) if fuuid_ not in node_keys: self.logger.info('We clean deprecated cfg file %s' % cfg) self.clean_node_files(fuuid_) # main method to export http interface. Must be in a method that got # a self entry def main_thread(self): # If the detector did not run, we are not sure about the groups of the local node # so wait for it to be run, so we can generate shinken file ok from start while detecter.did_run == False: time.sleep(1) self.enabled = self.get_parameter('enabled') while not self.enabled: self.enabled = self.get_parameter('enabled') time.sleep(1) if self.cfg_path is not None: self.clean_cfg_dir() # First look at all nodes in the gossip ring and regerate them node_keys = gossiper.nodes.keys() for nid in node_keys: n = gossiper.get(nid) if n is None: continue self.generate_node_file(n) while not stopper.is_stop(): self.logger.debug('Shinken loop, regenerate [%s]' % self.regenerate_flag) # If we can, export all states into the nagios/shinken daemon as passive checks self.export_all_states() time.sleep(1) # If not initialize, skip loop if self.cfg_path is None or gossiper is None: continue # If nothing to do in configuration, skip it too if not self.regenerate_flag: continue self.logger.info('Shinken callback raised, managing events: %s' % self.node_changes) # Set that we will manage all now self.regenerate_flag = False node_ids = self.node_changes self.node_changes = [] for (evt, nid) in node_ids: n = gossiper.get(nid) if evt == 'new-node': if n is None: # maybe someone just delete the node? continue self.logger.info('Manage new node %s' % n) self.generate_node_file(n) self.export_states_uuids.add(nid) elif evt == 'delete-node': self.logger.info('Removing deleted node %s' % nid) self.clean_node_files(nid) elif evt == 'change-node': self.logger.info( 'A node did change, updating its configuration. Node %s' % nid) self.generate_node_file(n) self.export_states_uuids.add(nid) # If we need to reload and have a reload commmand, do it if self.reload_flag and self.reload_command: self.reload_flag = False rc, stdout, stderr = exec_command(self.reload_command) stdout += stderr if rc != 0: self.logger.error('Cannot reload monitoring daemon: %s' % stdout) return self.logger.info('Monitoring daemon reload: OK') payload = {'type': 'shinken-restart'} gossiper.stack_event_broadcast(payload)
class StatsdModule(ListenerModule): implement = 'statsd' parameters = { 'enabled_if_group': StringParameter(default='statsd-listener'), 'port': IntParameter(default=8125), 'interval': IntParameter(default=10), 'address': StringParameter(default='0.0.0.0'), } def __init__(self): ListenerModule.__init__(self) self.statsd = None self.enabled = False self.enabled_if_group = 'statsd-listener' self.port = 0 self.udp_sock = None self.addr = '0.0.0.0' self.last_write = time.time() self.nb_data = 0 # Do not step on your own foot... self.stats_lock = threading.RLock() # our main data structs self.gauges = {} self.timers = {} self.histograms = {} self.counters = {} # Numpy lib is heavy, don't load it unless we really need it self.np = None # if we never got any metrics, we do a large wait for thread # but as soon as we have one, go into small waits self.did_have_metrics = False def prepare(self): self.logger.debug('Statsd: prepare phase') self.statsd_port = self.get_parameter('port') self.stats_interval = self.get_parameter('interval') self.addr = self.get_parameter('address') # Prepare to open the UDP port def __open_socket(self): # We need the numpy if self.np is None: try: import numpy as np self.np = np except ImportError: self.logger.error('The numpy librairy is not installed') self.np = None return self.udp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # UDP self.udp_sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 1048576) self.udp_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) self.logger.debug( self.udp_sock.getsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF)) self.udp_sock.bind((self.addr, self.statsd_port)) self.logger.info("TS UDP port open", self.statsd_port) self.logger.debug( "UDP RCVBUF", self.udp_sock.getsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF)) def __close_socket(self): if self.udp_sock: self.udp_sock.close() self.udp_sock = None def get_info(self): state = 'STARTED' if self.enabled else 'DISABLED' log = '' if self.enabled and self.np is None: log = 'ERROR: cannot start the module: missing python-numpy package' state = 'ERROR' return {'configuration': self.get_config(), 'state': state, 'log': log} def launch(self): threader.create_and_launch(self.launch_statsd_udp_listener, name='UDP port:%d listening' % self.statsd_port, essential=True, part='statsd') threader.create_and_launch(self.launch_compute_stats_thread, name='Stats computing', essential=True, part='statsd') # The compute stats thread compute the STATSD values each X # seconds and push them into the classic TS part def launch_compute_stats_thread(self): while not stopper.is_stop(): now = time.time() if now > self.last_write + self.stats_interval: self.compute_stats() self.last_write = now if self.did_have_metrics: # small wait time.sleep(0.1) else: time.sleep(5) # can wait a bit for the first run def compute_stats(self): now = int(time.time()) self.logger.debug("Computing stats") # First gauges, we take the data and put a void dict instead so the other thread can work now with self.stats_lock: gauges = self.gauges self.gauges = {} for mname in gauges: _sum, nb, _min, _max = gauges[mname] _avg = _sum / float(nb) key = 'stats.gauges.' + mname tsmgr.tsb.add_value(now, key, _avg) key = 'stats.gauges.' + mname + '.min' tsmgr.tsb.add_value(now, key, _min) key = 'stats.gauges.' + mname + '.max' tsmgr.tsb.add_value(now, key, _max) # Now counters with self.stats_lock: counters = self.counters self.counters = {} for mname in counters: cvalue, ccount = counters[mname] # count key = 'stats.gauges.' + mname + '.count' tsmgr.tsb.add_value(now, key, cvalue) # rate key = 'stats.gauges.' + mname + '.rate' tsmgr.tsb.add_value(now, key, cvalue / self.stats_interval) # Now timers, lot of funs :) with self.stats_lock: timers = self.timers self.timers = {} _t = time.time() for (mname, timer) in timers.items(): # We will need to compute the mean_99, count_99, upper_99, sum_99, sum_quares_99 # but also std, upper, lower, count, count_ps, sum, sum_square, mean, median _t = time.time() npvalues = self.np.array(timer) # Mean mean = self.np.mean(npvalues) key = 'stats.timers.' + mname + '.mean' tsmgr.tsb.add_value(now, key, mean) # Upper 99th, percentile upper_99 = self.np.percentile(npvalues, 99) key = 'stats.timers.' + mname + '.upper_99' tsmgr.tsb.add_value(now, key, upper_99) # Sum 99 sum_99 = npvalues[:(npvalues < upper_99).argmin()].sum() key = 'stats.timers.' + mname + '.sum_99' tsmgr.tsb.add_value(now, key, sum_99) # Standard deviation std = self.np.std(npvalues) key = 'stats.timers.' + mname + '.std' tsmgr.tsb.add_value(now, key, std) # Simple count count = len(timer) key = 'stats.timers.' + mname + '.count' tsmgr.tsb.add_value(now, key, count) # Sum of all _sum = self.np.sum(npvalues) key = 'stats.timers.' + mname + '.sum' tsmgr.tsb.add_value(now, key, _sum) # Median of all median = self.np.percentile(npvalues, 50) key = 'stats.timers.' + mname + '.median' tsmgr.tsb.add_value(now, key, median) # Upper of all upper = self.np.max(npvalues) key = 'stats.timers.' + mname + '.upper' tsmgr.tsb.add_value(now, key, upper) # Lower of all lower = self.np.min(npvalues) key = 'stats.timers.' + mname + '.lower' tsmgr.tsb.add_value(now, key, lower) # This is ht main STATSD UDP listener thread. Should not block and # be as fast as possible def launch_statsd_udp_listener(self): while not stopper.is_stop(): if_group = self.get_parameter('enabled_if_group') self.enabled = gossiper.is_in_group(if_group) # Ok, if we are not enabled, so not even talk to statsd if not self.enabled: self.__close_socket() time.sleep(1) continue # maybe we were enabled, then not, then again, if so re-prepare if self.udp_sock is None: self.__open_socket() # Maybe we f**k on the socket or the numpy lib (maybe installation in progress) if self.udp_sock is None: self.logger.error( 'Seems that the socket or numpy are not realy, postpone the module initialiation' ) time.sleep(1) continue try: data, addr = self.udp_sock.recvfrom( 65535) # buffer size is 1024 bytes except socket.timeout: # loop until we got something continue self.logger.debug("UDP: received message:", data, addr) # No data? bail out :) if len(data) == 0: continue self.logger.debug("GETDATA", data) for line in data.splitlines(): # avoid invalid lines if '|' not in line: continue elts = line.split('|', 1) # invalid, no type in the right part if len(elts) == 1: continue _name_value = elts[0].strip() # maybe it's an invalid name... if ':' not in _name_value: continue _nvs = _name_value.split(':') if len(_nvs) != 2: continue mname = _nvs[0].strip() # We have a ral value, so we will allow now smaller wait time self.did_have_metrics = True # Two cases: it's for me or not hkey = hashlib.sha1(mname).hexdigest() ts_node_manager = gossiper.find_group_node('ts', hkey) # if it's me that manage this key, I add it in my backend if ts_node_manager != gossiper.uuid: node = gossiper.get(ts_node_manager) # threads are dangerous things... if node is None: continue # TODO: do bulk send of this, like for graphite sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) # do NOT use the node['port'], it's the internal communication, not the graphite one! sock.sendto(line, (node['addr'], self.statsd_port)) sock.close() continue # Here we are sure it's really for us, so manage it :) value = to_best_int_float(_nvs[1].strip()) if not mname or value is None: continue # Look at the type of the data _type = elts[1].strip() if len(_type) == 0: continue ## Gauge: <metric name>:<value>|g elif _type == 'g': self.nb_data += 1 self.logger.log('GAUGE', mname, value) with self.stats_lock: gentry = self.gauges.get(mname, None) if gentry is None: # sum, nb, min, max gentry = (0.0, 0, None, None) _sum, nb, _min, _max = gentry _sum += value nb += 1 if _min is None or value < _min: _min = value if _max is None or value > _max: _max = value self.gauges[mname] = (_sum, nb, _min, _max) self.logger.debug('NEW GAUGE', mname, self.gauges[mname]) ## Timers: <metric name>:<value>|ms ## But also ## Histograms: <metric name>:<value>|h elif _type == 'ms' or _type == 'h': self.logger.debug('timers', mname, value) # TODO: avoid the SET each time timer = self.timers.get(mname, []) timer.append(value) self.timers[mname] = timer ## Counters: <metric name>:<value>|c[|@<sample rate>] elif _type == 'c': self.nb_data += 1 self.logger.info('COUNTER', mname, value, "rate", 1) with self.stats_lock: cvalue, ccount = self.counters.get(mname, (0, 0)) self.counters[mname] = (cvalue + value, ccount + 1) self.logger.debug('NEW COUNTER', mname, self.counters[mname]) ## Meters: <metric name>:<value>|m elif _type == 'm': self.logger.debug('METERs', mname, value) else: # unknow type, maybe a c[|@<sample rate>] if _type[0] == 'c': self.nb_data += 1 if not '|' in _type: continue srate = _type.split('|')[1].strip() if len(srate) == 0 or srate[0] != '@': continue try: rate = float(srate[1:]) except ValueError: continue # Invalid rate, 0.0 is invalid too ;) if rate <= 0.0 or rate > 1.0: continue self.logger.debug('COUNTER', mname, value, "rate", rate) with self.stats_lock: cvalue, ccount = self.counters.get(mname, (0, 0)) self.logger.debug('INCR counter', (value / rate)) self.counters[mname] = (cvalue + (value / rate), ccount + 1 / rate) self.logger.debug('NEW COUNTER', mname, self.counters[mname])
class DNSModule(ListenerModule): implement = 'dns' parameters = { 'enabled_if_group': StringParameter(default='dns-listener'), 'port': IntParameter(default=6766), 'domain': StringParameter(default='.opsbro'), } def __init__(self): super(DNSModule, self).__init__() self.enabled = False self.port = 0 self.domain = '' self.sock = None # Let my logger to the sub class DNSQuery.logger = self.logger def get_my_parameters(self): if_group = self.get_parameter('enabled_if_group') enabled = gossiper.is_in_group(if_group) self.logger.debug('Looking if the group %s is matching: %s' % (if_group, enabled)) port = self.get_parameter('port') domain = self.get_parameter('domain') # assume that domain is like .foo. if not domain.endswith('.'): domain += '.' if not domain.startswith('.'): domain = '.' + domain return enabled, port, domain def get_info(self): state = 'STARTED' if self.enabled else 'DISABLED' log = '' return {'configuration': self.get_config(), 'state': state, 'log': log} def launch(self): threader.create_and_launch(self.do_launch, name='UDP port:%d listening' % self.port, essential=True, part='dns') def close_socket(self): if self.sock is None: return try: self.sock.close() except Exception as exp: self.logger.error('Cannot close DNS socket: %s' % exp) self.sock = None def bind(self): # Always be sure to close our socket if binding a new self.close_socket() self.logger.info('Opening UDP port') # Prepare the socket in the prepare phase because it's mandatory self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.logger.info('DNS launched server port %d' % self.port) try: self.sock.bind(('', self.port)) except Exception as exp: self.logger.error('Cannot open the DNS port %s : %s' % (self.port, exp)) self.sock = None def do_launch(self): # If the detector did not run, we are not sure about the groups of the local node # so wait for it to be run while not detecter.did_run: time.sleep(1) while not stopper.is_stop(): # Note: domain is dynamic in analysis, don't need to look at differences was_enabled, prev_port = self.enabled, self.port self.enabled, self.port, self.domain = self.get_my_parameters() # Manage stop or skip loop if not self.enabled: # If we are going to stop, close our socket and wait for new enabled if was_enabled: self.close_socket() # Ok wait a bit time.sleep(1) continue # Multiple cases will need us to open/reopen the socket # but we want to do it only once reopen = False # We are enabled, maybe we were not just before # if so we must bind our port if not was_enabled: reopen = True # Maybe just the port did change if self.port != prev_port: reopen = True # Maybe we fail to open it before (port already open ?) if self.sock is None: reopen = True # Ok if we need to reopen, do it if reopen: self.bind() # But maybe we did miss the bind # so skip this turn if self.sock is None: time.sleep(1) continue # Ok we are good :) try: data, addr = self.sock.recvfrom(1024) except socket.timeout: continue # loop until we got some data :) try: p = DNSQuery(data) r = p.lookup_for_nodes(self.domain) self.logger.debug("DNS lookup nodes response:", r) self.sock.sendto(p.response(r), addr) except Exception: self.logger.error('Module got issue: %s' % (str(traceback.format_exc())))
class Mongodb(Collector): parameters = { 'uri': StringParameter(default='mongodb://localhost'), 'user': StringParameter(default=''), 'password': StringParameter(default=''), 'replicat_set': BoolParameter(default=False), } def __init__(self): super(Mongodb, self).__init__() self.pymongo = None self.mongoDBStore = None def _clean_struct(self, e): to_del = [] if isinstance(e, dict): for (k, v) in e.items(): if isinstance(v, dict): self._clean_struct(v) continue if isinstance(v, list) or isinstance(v, tuple): for sub_e in v: self._clean_struct(sub_e) continue if not isinstance(v, Number) and not isinstance(v, basestring): self.logger.debug('CLEANING bad entry type: %s %s %s' % (k, v, type(v))) to_del.append(k) continue for k in to_del: del e[k] def launch(self): logger = self.logger logger.debug('getMongoDBStatus: start') if not self.is_in_group('mongodb'): self.set_not_eligible( 'Please add the mongodb group to enable this collector.') return # Try to import pymongo from system (will be the best choice) # but if no available, switch to the embedded one # NOTE: the embedded is a 2.9.2 with centos 7.so file, but in other ditro only the c # extension will not load, but it's not a real problem as we don't care about the lib perf here if self.pymongo is None: try: import pymongo self.pymongo = pymongo except ImportError: my_dir = os.path.abspath(os.path.dirname(__file__)) sys.path.insert(0, my_dir) try: import pymongo self.pymongo = pymongo except ImportError as exp: self.set_error( 'Unable to import pymongo library, even the embedded one (%s)' % exp) return False finally: try: sys.path.remove(my_dir) except: pass try: mongoURI = '' parsed = urlparse(self.get_parameter('uri')) # Can't use attributes on Python 2.4 if parsed[0] != 'mongodb': mongoURI = 'mongodb://' if parsed[2]: if parsed[0]: mongoURI = mongoURI + parsed[0] + ':' + parsed[2] else: mongoURI = mongoURI + parsed[2] else: mongoURI = self.get_parameter('uri') logger.debug('-- mongoURI: %s', mongoURI) if hasattr(self.pymongo, 'Connection'): # Old pymongo conn = self.pymongo.Connection(mongoURI, slave_okay=True) else: # new pymongo (> 2.9.5) conn = self.pymongo.MongoClient(mongoURI) logger.debug('Connected to MongoDB') except self.pymongo.errors.ConnectionFailure as exp: self.set_error( 'Unable to connect to MongoDB server %s - Exception = %s' % (mongoURI, exp)) return False # Older versions of pymongo did not support the command() # method below. try: db = conn['local'] # Server status statusOutput = db.command( 'serverStatus') # Shorthand for {'serverStatus': 1} logger.debug('getMongoDBStatus: executed serverStatus') # Setup status = {'available': True} self._clean_struct( statusOutput) # remove objects type we do not want status.update(statusOutput) # Version try: status['version'] = statusOutput['version'] logger.debug('getMongoDBStatus: version %s', statusOutput['version']) except KeyError as ex: logger.error( 'getMongoDBStatus: version KeyError exception = %s', ex) pass # Global locks try: logger.debug('getMongoDBStatus: globalLock') status['globalLock'] = {} status['globalLock']['ratio'] = statusOutput['globalLock'][ 'ratio'] status['globalLock']['currentQueue'] = {} status['globalLock']['currentQueue']['total'] = statusOutput[ 'globalLock']['currentQueue']['total'] status['globalLock']['currentQueue']['readers'] = statusOutput[ 'globalLock']['currentQueue']['readers'] status['globalLock']['currentQueue']['writers'] = statusOutput[ 'globalLock']['currentQueue']['writers'] except KeyError as ex: logger.debug( 'getMongoDBStatus: globalLock KeyError exception = %s' % ex) pass # Memory try: logger.debug('getMongoDBStatus: memory') status['mem'] = {} status['mem']['resident'] = statusOutput['mem']['resident'] status['mem']['virtual'] = statusOutput['mem']['virtual'] status['mem']['mapped'] = statusOutput['mem']['mapped'] except KeyError as ex: logger.debug( 'getMongoDBStatus: memory KeyError exception = %s', ex) pass # Connections try: logger.debug('getMongoDBStatus: connections') status['connections'] = {} status['connections']['current'] = statusOutput['connections'][ 'current'] status['connections']['available'] = statusOutput[ 'connections']['available'] except KeyError as ex: logger.debug( 'getMongoDBStatus: connections KeyError exception = %s', ex) pass # Extra info (Linux only) try: logger.debug('getMongoDBStatus: extra info') status['extraInfo'] = {} status['extraInfo']['heapUsage'] = statusOutput['extra_info'][ 'heap_usage_bytes'] status['extraInfo']['pageFaults'] = statusOutput['extra_info'][ 'page_faults'] except KeyError as ex: logger.debug( 'getMongoDBStatus: extra info KeyError exception = %s', ex) pass # Background flushing try: logger.debug('getMongoDBStatus: backgroundFlushing') status['backgroundFlushing'] = {} delta = datetime.datetime.utcnow( ) - statusOutput['backgroundFlushing']['last_finished'] status['backgroundFlushing'][ 'secondsSinceLastFlush'] = delta.seconds status['backgroundFlushing']['lastFlushLength'] = statusOutput[ 'backgroundFlushing']['last_ms'] status['backgroundFlushing']['flushLengthAvrg'] = statusOutput[ 'backgroundFlushing']['average_ms'] except KeyError as ex: logger.debug( 'getMongoDBStatus: backgroundFlushing KeyError exception = %s', ex) pass # Per second metric calculations (opcounts and asserts) try: if self.mongoDBStore is None: logger.debug( 'getMongoDBStatus: per second metrics no cached data, so storing for first time' ) self.setMongoDBStore(statusOutput) else: logger.debug( 'getMongoDBStatus: per second metrics cached data exists' ) accessesPS = float( statusOutput['indexCounters']['btree']['accesses'] - self.mongoDBStore['indexCounters']['btree'] ['accessesPS']) / 60 if accessesPS >= 0: status['indexCounters'] = {} status['indexCounters']['btree'] = {} status['indexCounters']['btree'][ 'accessesPS'] = accessesPS status['indexCounters']['btree']['hitsPS'] = float( statusOutput['indexCounters']['btree']['hits'] - self.mongoDBStore['indexCounters']['btree'] ['hitsPS']) / 60 status['indexCounters']['btree']['missesPS'] = float( statusOutput['indexCounters']['btree']['misses'] - self.mongoDBStore['indexCounters']['btree'] ['missesPS']) / 60 status['indexCounters']['btree']['missRatioPS'] = float( statusOutput['indexCounters']['btree']['missRatio'] - self.mongoDBStore['indexCounters']['btree'] ['missRatioPS']) / 60 status['opcounters'] = {} status['opcounters']['insertPS'] = float( statusOutput['opcounters']['insert'] - self.mongoDBStore['opcounters']['insertPS']) / 60 status['opcounters']['queryPS'] = float( statusOutput['opcounters']['query'] - self.mongoDBStore['opcounters']['queryPS']) / 60 status['opcounters']['updatePS'] = float( statusOutput['opcounters']['update'] - self.mongoDBStore['opcounters']['updatePS']) / 60 status['opcounters']['deletePS'] = float( statusOutput['opcounters']['delete'] - self.mongoDBStore['opcounters']['deletePS']) / 60 status['opcounters']['getmorePS'] = float( statusOutput['opcounters']['getmore'] - self.mongoDBStore['opcounters']['getmorePS']) / 60 status['opcounters']['commandPS'] = float( statusOutput['opcounters']['command'] - self.mongoDBStore['opcounters']['commandPS']) / 60 status['asserts'] = {} status['asserts']['regularPS'] = float( statusOutput['asserts']['regular'] - self.mongoDBStore['asserts']['regularPS']) / 60 status['asserts']['warningPS'] = float( statusOutput['asserts']['warning'] - self.mongoDBStore['asserts']['warningPS']) / 60 status['asserts']['msgPS'] = float( statusOutput['asserts']['msg'] - self.mongoDBStore['asserts']['msgPS']) / 60 status['asserts']['userPS'] = float( statusOutput['asserts']['user'] - self.mongoDBStore['asserts']['userPS']) / 60 status['asserts']['rolloversPS'] = float( statusOutput['asserts']['rollovers'] - self.mongoDBStore['asserts']['rolloversPS']) / 60 self.setMongoDBStore(statusOutput) else: logger.debug( 'getMongoDBStatus: per second metrics negative value calculated, mongod likely restarted, so clearing cache' ) self.setMongoDBStore(statusOutput) except KeyError as ex: logger.debug( 'getMongoDBStatus: per second metrics KeyError exception = %s' % ex) pass # Cursors try: logger.debug('getMongoDBStatus: cursors') status['cursors'] = {} status['cursors']['totalOpen'] = statusOutput['cursors'][ 'totalOpen'] except KeyError as ex: logger.debug( 'getMongoDBStatus: cursors KeyError exception = %s' % ex) pass # Replica set status if self.get_parameter('replicat_set'): logger.debug('getMongoDBStatus: get replset status too') # isMaster (to get state isMaster = db.command('isMaster') logger.debug('getMongoDBStatus: executed isMaster') status['replSet'] = {} status['replSet']['setName'] = isMaster['setName'] status['replSet']['isMaster'] = isMaster['ismaster'] status['replSet']['isSecondary'] = isMaster['secondary'] if 'arbiterOnly' in isMaster: status['replSet']['isArbiter'] = isMaster['arbiterOnly'] logger.debug('getMongoDBStatus: finished isMaster') # rs.status() db = conn['admin'] replSet = db.command('replSetGetStatus') logger.debug('getMongoDBStatus: executed replSetGetStatus') status['replSet']['myState'] = replSet['myState'] status['replSet']['members'] = {} for member in replSet['members']: logger.debug( 'getMongoDBStatus: replSetGetStatus looping %s', member['name']) status['replSet']['members'][str(member['_id'])] = {} status['replSet']['members'][str( member['_id'])]['name'] = member['name'] status['replSet']['members'][str( member['_id'])]['state'] = member['state'] # Optime delta (only available from not self) # Calculation is from http://docs.python.org/library/datetime.html#datetime.timedelta.total_seconds if 'optimeDate' in member: # Only available as of 1.7.2 deltaOptime = datetime.datetime.utcnow( ) - member['optimeDate'] status['replSet']['members'][str( member['_id'])]['optimeDate'] = ( deltaOptime.microseconds + (deltaOptime.seconds + deltaOptime.days * 24 * 3600) * 10**6) / 10**6 if 'self' in member: status['replSet']['myId'] = member['_id'] # Have to do it manually because total_seconds() is only available as of Python 2.7 else: if 'lastHeartbeat' in member: deltaHeartbeat = datetime.datetime.utcnow( ) - member['lastHeartbeat'] status['replSet']['members'][str( member['_id'])]['lastHeartbeat'] = ( deltaHeartbeat.microseconds + (deltaHeartbeat.seconds + deltaHeartbeat.days * 24 * 3600) * 10**6) / 10**6 if 'errmsg' in member: status['replSet']['members'][str( member['_id'])]['error'] = member['errmsg'] # db.stats() logger.debug('getMongoDBStatus: db.stats() too') status['dbStats'] = {} for database in conn.database_names(): if database != 'config' and database != 'local' and database != 'admin' and database != 'test': logger.debug( 'getMongoDBStatus: executing db.stats() for %s', database) status['dbStats'][database] = conn[database].command( 'dbstats') status['dbStats'][database]['namespaces'] = conn[database][ 'system']['namespaces'].count() # Ensure all strings to prevent JSON parse errors. We typecast on the server for key in status['dbStats'][database].keys(): status['dbStats'][database][key] = str( status['dbStats'][database][key]) # try a float/int cast v = to_best_int_float(status['dbStats'][database][key]) if v is not None: status['dbStats'][database][key] = v except Exception: logger.error('Unable to get MongoDB status - Exception = %s', traceback.format_exc()) return False logger.debug('getMongoDBStatus: completed, returning') return status def setMongoDBStore(self, statusOutput): self.mongoDBStore = {} self.mongoDBStore['indexCounters'] = {} self.mongoDBStore['indexCounters']['btree'] = {} self.mongoDBStore['indexCounters']['btree'][ 'accessesPS'] = statusOutput['indexCounters']['btree']['accesses'] self.mongoDBStore['indexCounters']['btree']['hitsPS'] = statusOutput[ 'indexCounters']['btree']['hits'] self.mongoDBStore['indexCounters']['btree']['missesPS'] = statusOutput[ 'indexCounters']['btree']['misses'] self.mongoDBStore['indexCounters']['btree'][ 'missRatioPS'] = statusOutput['indexCounters']['btree'][ 'missRatio'] self.mongoDBStore['opcounters'] = {} self.mongoDBStore['opcounters']['insertPS'] = statusOutput[ 'opcounters']['insert'] self.mongoDBStore['opcounters']['queryPS'] = statusOutput[ 'opcounters']['query'] self.mongoDBStore['opcounters']['updatePS'] = statusOutput[ 'opcounters']['update'] self.mongoDBStore['opcounters']['deletePS'] = statusOutput[ 'opcounters']['delete'] self.mongoDBStore['opcounters']['getmorePS'] = statusOutput[ 'opcounters']['getmore'] self.mongoDBStore['opcounters']['commandPS'] = statusOutput[ 'opcounters']['command'] self.mongoDBStore['asserts'] = {} self.mongoDBStore['asserts']['regularPS'] = statusOutput['asserts'][ 'regular'] self.mongoDBStore['asserts']['warningPS'] = statusOutput['asserts'][ 'warning'] self.mongoDBStore['asserts']['msgPS'] = statusOutput['asserts']['msg'] self.mongoDBStore['asserts']['userPS'] = statusOutput['asserts'][ 'user'] self.mongoDBStore['asserts']['rolloversPS'] = statusOutput['asserts'][ 'rollovers']
class ShinkenEnterpriseModule(ConnectorModule): implement = 'shinken-enterprise' parameters = { 'enabled': BoolParameter(default=False), 'file_result': StringParameter(default=''), } # We only work at the stopping phase, when all is finish, to get back our discovery def stopping_agent(self): enabled = self.get_parameter('enabled') if not enabled: return groups = gossiper.groups # no need to copy, the group pointer is in read only self.logger.info( 'Pushing back ours groups and discovery informations to Shinken Enterprise' ) collectors_data = {} for (ccls, e) in collectormgr.collectors.items(): cname, c = collectormgr.get_collector_json_extract(e) collectors_data[cname] = c # In groups=> templates, we do not want : and . in the names _mapping = {':': '--', '.': '--'} use_value = ','.join(groups) for (k, v) in _mapping.items(): use_value = use_value.replace(k, v) payload = { '_AGENT_UUID': gossiper.uuid, 'use': use_value, } # System info system_results = collectors_data.get('system', {}).get('results', {}) hostname = system_results.get('hostname', '') payload['host_name'] = hostname fqdn = system_results.get('fqdn', '') if fqdn: payload['_FQDN'] = fqdn publicip = system_results.get('publicip', '') if publicip: payload['_PUBLIC_IP'] = publicip # which address to use in fact? # how to choose: fqdn > public_ip > hostname if fqdn: payload['address'] = fqdn elif publicip: payload['address'] = publicip else: payload['address'] = hostname # Timezone timezone = collectors_data.get('timezone', {}).get('results', {}).get('timezone', '') if timezone: payload['_TIMEZONE'] = bytes_to_unicode(timezone) cpucount = system_results.get('cpucount', '') if cpucount: payload['_CPU_COUNT'] = str(cpucount) # data must be string linux_distribution = system_results.get('os', {}).get('linux', {}).get( 'distribution', '') if linux_distribution: payload['_LINUX_DISTRIBUTION'] = linux_distribution # Memory physical_memory = collectors_data.get('timezone', {}).get( 'results', {}).get('phys_total', '') if physical_memory: payload['_PHYSICAL_MEMORY'] = physical_memory # Network try: network_interfaces = ','.join( collectors_data.get('interfaces', {}).get('results', {}).keys()) except AttributeError: # was without interfaces network_interfaces = '' if network_interfaces: payload['_NETWORK_INTERFACES'] = network_interfaces # Geoloc (lat and long) try: geoloc = collectors_data.get('geoloc', {}).get('results', {}).get('loc', '') except AttributeError: # was without interfaces geoloc = '' if geoloc and geoloc.count(',') == 1: lat, long = geoloc.split(',', 1) payload['_LAT'] = lat payload['_LONG'] = long # disks try: volumes = ','.join( collectors_data.get('diskusage', {}).get('results', {}).keys()) except AttributeError: volumes = '' if volumes: payload['_VOLUMES'] = volumes file_result = self.get_parameter('file_result') self.logger.info('Writing file result to : %s' % file_result) if file_result: f = open(file_result, 'w') f.write(jsoner.dumps(payload, indent=4)) f.close()
class MailHandlerModule(HandlerModule): implement = 'mail' parameters = { 'enabled': BoolParameter(default=False), 'severities': StringListParameter(default=['ok', 'warning', 'critical', 'unknown']), 'contacts': StringListParameter(default=['*****@*****.**']), 'addr_from': StringParameter(default='*****@*****.**'), 'smtp_server': StringParameter(default='localhost'), 'smtps': BoolParameter(default=False), 'check_subject_template': StringParameter(default='mail-check-subject.tpl'), 'check_text_template': StringParameter(default='mail-check-text.tpl'), 'group_subject_template': StringParameter(default='mail-group-subject.tpl'), 'group_text_template': StringParameter(default='mail-group-text.tpl'), 'compliance_subject_template': StringParameter(default='mail-compliance-subject.tpl'), 'compliance_text_template': StringParameter(default='mail-compliance-text.tpl'), } def __init__(self): super(MailHandlerModule, self).__init__() self.jinja2 = libstore.get_jinja2() self.smtplib = None # Check templates, to load them only once self.__computed_templates = { 'check': { 'subject': None, 'text': None }, 'group': { 'subject': None, 'text': None }, 'compliance': { 'subject': None, 'text': None }, } def __send_email(self, addr_from, msg, about_what): # Lazy load smtplib if self.smtplib is None: import smtplib self.smtplib = smtplib smtp_server = self.get_parameter("smtp_server") smtps = self.get_parameter("smtps") contacts = self.get_parameter('contacts') try: self.logger.debug("Handler: MAIL connection to %s" % smtp_server) s = self.smtplib.SMTP(smtp_server, timeout=30) r = s.sendmail(addr_from, contacts, msg.as_string()) s.quit() self.logger.info('Did send an email to %d contacts (%s) about %s' % (len(contacts), ','.join(contacts), about_what)) except Exception: self.logger.error('Cannot send mail: %s' % traceback.format_exc()) def __get_msg(self, addr_from, subject_m, text_m): from email.mime.text import MIMEText from email.header import Header msg = MIMEText(text_m, 'plain', 'utf-8') msg['From'] = addr_from msg['Subject'] = Header(subject_m, 'utf-8') return msg def __get_computed_template(self, for_what, which_template): what_entry = self.__computed_templates[for_what] return what_entry[which_template] def __load_and_compute_one_template(self, for_what, which_template): templates_dir = os.path.join(self.pack_directory, 'templates') pth = self.get_parameter('%s_%s_template' % (for_what, which_template)) full_pth = os.path.join(templates_dir, pth) if not os.path.exists(full_pth): self.logger.error('Missing template file %s_%s_template: %s' % (for_what, which_template, full_pth)) return False try: with codecs.open(full_pth, 'r', 'utf8') as f: buf = f.read() except Exception as exp: self.logger.error( 'Cannot load template file %s_%s_template (%s) : %s' % (for_what, which_template, full_pth, exp)) return False try: tpl = self.jinja2.Template(buf) except Exception as exp: self.logger.error( 'The template %s_%s_template (%s) did raised an error when parsing: %s' % (for_what, which_template, full_pth, exp)) return False # Ok we can save it what_entry = self.__computed_templates[for_what] what_entry[which_template] = tpl return True def __compute_templates(self, for_what): # Maybe it's already computed subject_tpl = self.__get_computed_template(for_what, 'subject') text_tpl = self.__get_computed_template(for_what, 'text') if subject_tpl is not None and text_tpl is not None: return True success = True success &= self.__load_and_compute_one_template(for_what, 'subject') success &= self.__load_and_compute_one_template(for_what, 'text') subject_tpl = self.__get_computed_template(for_what, 'subject') text_tpl = self.__get_computed_template(for_what, 'text') return subject_tpl is not None and text_tpl is not None def send_mail_check(self, check): have_templates = self.__compute_templates('check') if not have_templates: self.logger.error( 'We do not have templates available, skiping the email sending' ) return subject_tpl = self.__get_computed_template('check', 'subject') text_tpl = self.__get_computed_template('check', 'text') try: _time = datetime.datetime.fromtimestamp(int( time.time())).strftime('%Y-%m-%d %H:%M:%S') subject_m = subject_tpl.render(check=check, _time=_time) text_m = text_tpl.render(check=check, _time=_time) addr_from = self.get_parameter('addr_from') msg = self.__get_msg(addr_from, subject_m, text_m) self.__send_email(addr_from, msg, 'check state change') except: self.logger.error('Cannot send mail for check: %s' % traceback.format_exc()) def send_mail_group(self, group, group_modification): have_templates = self.__compute_templates('group') if not have_templates: self.logger.error( 'We do not have templates available, skiping the email sending' ) return subject_tpl = self.__get_computed_template('group', 'subject') text_tpl = self.__get_computed_template('group', 'text') try: _time = datetime.datetime.fromtimestamp(int( time.time())).strftime('%Y-%m-%d %H:%M:%S') subject_m = subject_tpl.render( group=group, group_modification=group_modification) text_m = text_tpl.render(group=group, group_modification=group_modification) addr_from = self.get_parameter('addr_from') msg = self.__get_msg(addr_from, subject_m, text_m) self.__send_email(addr_from, msg, 'group modification') except: self.logger.error('Cannot send mail for group modification: %s' % traceback.format_exc()) def send_mail_compliance(self, compliance): have_templates = self.__compute_templates('compliance') if not have_templates: self.logger.error( 'We do not have templates available, skiping the email sending' ) return subject_tpl = self.__get_computed_template('compliance', 'subject') text_tpl = self.__get_computed_template('compliance', 'text') try: _time = datetime.datetime.fromtimestamp(int( time.time())).strftime('%Y-%m-%d %H:%M:%S') subject_m = subject_tpl.render(compliance=compliance, _time=_time) text_m = text_tpl.render(compliance=compliance, _time=_time) addr_from = self.get_parameter('addr_from') msg = self.__get_msg(addr_from, subject_m, text_m) self.__send_email(addr_from, msg, 'compliance rule state change') except: self.logger.error( 'Cannot send mail for compliance modification: %s' % traceback.format_exc()) def handle(self, obj, event): enabled = self.get_parameter('enabled') if not enabled: self.logger.debug( 'Mail module is not enabled, skipping check alert sent') return self.logger.debug('Manage an obj event: %s (event=%s)' % (obj, event)) evt_type = event['evt_type'] # Checks: only notify about changes if evt_type == 'check_execution': evt_data = event['evt_data'] check_did_change = evt_data['check_did_change'] if check_did_change: self.send_mail_check(obj) # We are launched only if the group did change if evt_type == 'group_change': evt_data = event['evt_data'] group_modification = evt_data['modification'] self.send_mail_group(obj, group_modification) # Compliance: only when change, and only some switch cases should be # notify (drop useless changes) if evt_type == 'compliance_execution': evt_data = event['evt_data'] compliance_did_change = evt_data['compliance_did_change'] if compliance_did_change: self.send_mail_compliance(obj)
class Nginx(Collector): parameters = { 'uri': StringParameter(default='http://localhost/nginx_status'), } def __init__(self): super(Nginx, self).__init__() self.nginxRequestsStore = None def launch(self): logger = self.logger if not self.is_in_group('nginx'): self.set_not_eligible('Please add the nginx group to enable this collector.') return logger.debug('getNginxStatus: start') logger.debug('getNginxStatus: config set') try: response = httper.get(self.get_parameter('uri'), timeout=3) except get_http_exceptions() as exp: self.set_error('Unable to get Nginx status - HTTPError = %s' % exp) return False logger.debug('getNginxStatus: urlopen success, start parsing') # Thanks to http://hostingfu.com/files/nginx/nginxstats.py for this code logger.debug('getNginxStatus: parsing connections') try: # Connections parsed = re.search(r'Active connections:\s+(\d+)', response) connections = int(parsed.group(1)) logger.debug('getNginxStatus: parsed connections') logger.debug('getNginxStatus: parsing reqs') # Requests per second parsed = re.search(r'\s*(\d+)\s+(\d+)\s+(\d+)', response) if not parsed: logger.debug('getNginxStatus: could not parse response') return False requests = int(parsed.group(3)) logger.debug('getNginxStatus: parsed reqs') if self.nginxRequestsStore == None or self.nginxRequestsStore < 0: logger.debug('getNginxStatus: no reqs so storing for first time') self.nginxRequestsStore = requests requestsPerSecond = 0 else: logger.debug('getNginxStatus: reqs stored so calculating') logger.debug('getNginxStatus: self.nginxRequestsStore = %s', self.nginxRequestsStore) logger.debug('getNginxStatus: requests = %s', requests) requestsPerSecond = float(requests - self.nginxRequestsStore) / 60 logger.debug('getNginxStatus: requestsPerSecond = %s', requestsPerSecond) self.nginxRequestsStore = requests if connections != None and requestsPerSecond != None: logger.debug('getNginxStatus: returning with data') return {'connections': connections, 'reqPerSec': requestsPerSecond} else: logger.debug('getNginxStatus: returning without data') return False except Exception: self.set_error('Unable to get Nginx status - %s - Exception = %s' % (response, traceback.format_exc())) return False
class ImraneModule(ListenerModule): implement = 'imrane' parameters = { 'enabled': BoolParameter(default=False), 'collector-group': StringParameter(default='imrane-collector'), 'agregator-group': StringParameter(default='imrane-agregator'), 'database-path': StringParameter(default='/tmp/agregator.db'), } def __init__(self): ListenerModule.__init__(self) # reaping queue self.queue = [] self.enabled = False self.database = None self.cursor = None # Prepare to open the UDP port def prepare(self): self.logger.debug('IMRANE: prepare phase') self.enabled = self.get_parameter('enabled') if self.enabled: self.logger.info("IMRANE: starting") else: self.logger.info('IMRANE is not enabled, skipping it') def get_info(self): state = 'STARTED' if self.enabled else 'DISABLED' log = '' return {'configuration': self.get_config(), 'state': state, 'log': log} def launch(self): threader.create_and_launch(self.launch_database_thread, name='Database thread', essential=True, part='imrane') threader.create_and_launch(self.launch_collector_thread, name='Collector thread', essential=True, part='imrane') def _import_data(self, data): results = data['results'] from_name = data['from'] self.queue.append((from_name, results)) def stopping_agent(self): if self.database: self.logger.info('Closing database') self.database.commit() self.database.close() # Same but for the TCP connections # TODO: use a real daemon part for this, this is not ok for fast receive def launch_database_thread(self): while not stopper.is_stop(): agregator_group = self.get_parameter('agregator-group') database_enabled = gossiper.is_in_group(agregator_group) if not database_enabled: self.logger.debug('IMRANE: not a database thread') time.sleep(1) continue if self.database is None: database_path = self.get_parameter('database-path') self.database = sqlite3.connect(database_path) self.cursor = self.database.cursor() # Create data # TODO: check if not already exists tb_exists = "SELECT name FROM sqlite_master WHERE type='table' AND name='Data'" if not self.cursor.execute(tb_exists).fetchone(): self.cursor.execute( "CREATE TABLE Data(id INTEGER PRIMARY KEY, Epoch INTEGER, HostName TEXT, KeyName TEXT, Value TEXT)" ) self.logger.info('IMRANE: database loop') self.logger.info('IMRANE: manage: %s' % self.queue) # Switch to avoid locking queue = self.queue self.queue = [] now = int(time.time()) for (from_name, results) in queue: self.logger.info('SAVING INTO DATABASE: %s => %s' % (from_name, results)) # TODO: database code for (key, value) in results.items(): q = '''INSERT INTO Data(Epoch, HostName, KeyName, Value) VALUES (%s,'%s','%s','%s')''' % ( now, from_name, key, value) self.logger.info('EXECUTING: %s' % q) self.cursor.execute(q) self.database.commit() time.sleep(1) # Same but for the TCP connections # TODO: use a real daemon part for this, this is not ok for fast receive def launch_collector_thread(self): last_collector_check = 0 while not stopper.is_stop(): collector_group = self.get_parameter('collector-group') collector_enabled = gossiper.is_in_group(collector_group) if not collector_enabled: self.logger.debug('IMRANE: not a collector thread') time.sleep(1) continue self.logger.debug('IMRANE: collector loop') self.logger.debug('IMRANE: manage: %s' % self.queue) imrane_collector = None for collector in collectormgr.collectors.values(): name = collector['name'] if name == 'imrane': imrane_collector = collector break if imrane_collector is None: self.logger.error( 'IMRANE: cannot find the imrane collector, skiping this loop' ) time.sleep(1) continue # Maybe this collector did not run since we last look at it, if so, skip it last_check = imrane_collector['last_check'] if last_check == last_collector_check: self.logger.debug( 'IMRANE: the collector did not run since the last loop, skiping this turn' ) time.sleep(1) continue last_collector_check = last_check results = imrane_collector['results'] self.logger.info('IMRANE: collector result: %s' % results) our_node = gossiper.get(gossiper.uuid) our_node_name = our_node['name'] agregator_group = self.get_parameter('agregator-group') agregator_nodes = gossiper.find_group_nodes(agregator_group) if len(agregator_nodes) == 0: self.logger.error( 'IMRANE ERROR: there are no agregator nodes, skiping data sending' ) time.sleep(1) continue agregator_node_uuid = random.choice(agregator_nodes) agregator_node = gossiper.get(agregator_node_uuid) if agregator_node is None: # oups: thread race bug time.sleep(1) continue address = agregator_node['addr'] port = agregator_node['port'] display_name = agregator_node['display_name'] self.logger.info('IMRANE: did choose %s (%s:%s) for sending' % (display_name, address, port)) uri = 'http://%s:%s/imrane' % (address, port) try: r = httper.post( uri, params={ 'results': results, 'from': our_node_name }, headers={'Content-Type': 'application/json;charset=UTF-8'}) self.logger.debug("Result insert", r) except get_http_exceptions() as exp: self.logger.error('Cannot connect to agregator: %s' % exp) # always sleep to not hammer the CPU time.sleep(1) # Export end points to get/list TimeSeries def export_http(self): @http_export('/imrane', method='POST') @http_export('/imrane/', method='POST') def get_ts_values(): self.logger.info('CALLING /imrane POST') try: data_raw = request.body.getvalue() self.logger.info('POST: get body value: %s' % data_raw) data = jsoner.loads(data_raw) self.logger.info('POST: get results: %s' % data) self._import_data(data) except: self.logger.error('IMRANE: ERROR %s' % traceback.format_exc()) return None
class SlackHandlerModule(HandlerModule): implement = 'slack' parameters = { 'enabled_if_group': StringParameter(default='slack'), 'severities': StringListParameter(default=['ok', 'warning', 'critical', 'unknown']), 'token': StringParameter(default=''), 'channel': StringParameter(default='#alerts'), } def __init__(self): super(SlackHandlerModule, self).__init__() self.enabled = False def prepare(self): if_group = self.get_parameter('enabled_if_group') self.enabled = gossiper.is_in_group(if_group) def get_info(self): state = 'STARTED' if self.enabled else 'DISABLED' log = '' return {'configuration': self.get_config(), 'state': state, 'log': log} def __try_to_send_message(self, slack, attachments, channel): r = slack.chat.post_message(channel=channel, text='', as_user=True, attachments=attachments) self.logger.debug('[SLACK] return of the send: %s %s %s' % (r.successful, r.__dict__['body']['channel'], r.__dict__['body']['ts'])) def __get_token(self): token = self.get_parameter('token') if not token: token = os.environ.get('SLACK_TOKEN', '') return token def __send_slack_check(self, check): token = self.__get_token() if not token: self.logger.error( '[SLACK] token is not configured on the slack module. skipping slack messages.' ) return slack = Slacker(token) # title = '{date_num} {time_secs} [node:`%s`][addr:`%s`] Check `%s` is going %s' % (gossiper.display_name, gossiper.addr, check['name'], check['state']) content = check['output'] channel = self.get_parameter('channel') colors = {'ok': 'good', 'warning': 'warning', 'critical': 'danger'} node_name = '%s (%s)' % (gossiper.name, gossiper.addr) if gossiper.display_name: node_name = '%s [%s]' % (node_name, gossiper.display_name) attachment = { "pretext": ' ', "text": content, 'color': colors.get(check['state'], '#764FA5'), 'author_name': node_name, 'footer': 'Send by OpsBro on %s' % node_name, 'ts': int(time.time()) } fields = [ { "title": "Node", "value": node_name, "short": True }, { "title": "Check", "value": check['name'], "short": True }, ] attachment['fields'] = fields attachments = [attachment] self.__do_send_message(slack, attachments, channel) def __send_slack_group(self, group, group_modification): token = self.__get_token() if not token: self.logger.error( '[SLACK] token is not configured on the slack module. skipping slack messages.' ) return slack = Slacker(token) # title = '{date_num} {time_secs} [node:`%s`][addr:`%s`] Check `%s` is going %s' % (gossiper.display_name, gossiper.addr, check['name'], check['state']) content = 'The group %s was %s' % (group, group_modification) channel = self.get_parameter('channel') colors = {'remove': 'danger', 'add': 'good'} node_name = '%s (%s)' % (gossiper.name, gossiper.addr) if gossiper.display_name: node_name = '%s [%s]' % (node_name, gossiper.display_name) attachment = { "pretext": ' ', "text": content, 'color': colors.get(group_modification, '#764FA5'), 'author_name': node_name, 'footer': 'Send by OpsBro on %s' % node_name, 'ts': int(time.time()) } fields = [ { "title": "Node", "value": node_name, "short": True }, { "title": "Group:%s" % group_modification, "value": group, "short": True }, ] attachment['fields'] = fields attachments = [attachment] self.__do_send_message(slack, attachments, channel) def __send_slack_compliance(self, compliance): token = self.__get_token() if not token: self.logger.error( '[SLACK] token is not configured on the slack module. skipping slack messages.' ) return slack = Slacker(token) # title = '{date_num} {time_secs} [node:`%s`][addr:`%s`] Check `%s` is going %s' % (gossiper.display_name, gossiper.addr, check['name'], check['state']) content = 'The compliance %s changed from %s to %s' % ( compliance.get_name(), compliance.get_state(), compliance.get_old_state()) channel = self.get_parameter('channel') state_color = COMPLIANCE_STATE_COLORS.get(compliance.get_state()) color = { 'magenta': '#221220', 'green': 'good', 'cyan': '#cde6ff', 'red': 'danger', 'grey': '#cccccc' }.get(state_color, '#cccccc') node_name = '%s (%s)' % (gossiper.name, gossiper.addr) if gossiper.display_name: node_name = '%s [%s]' % (node_name, gossiper.display_name) attachment = { "pretext": ' ', "text": content, 'color': color, 'author_name': node_name, 'footer': 'Send by OpsBro on %s' % node_name, 'ts': int(time.time()) } fields = [ { "title": "Node", "value": node_name, "short": True }, { "title": "Compliance:%s" % compliance.get_name(), "value": compliance.get_state(), "short": True }, ] attachment['fields'] = fields attachments = [attachment] self.__do_send_message(slack, attachments, channel) def __do_send_message(self, slack, attachments, channel): try: self.__try_to_send_message(slack, attachments, channel) except Exception as exp: self.logger.error('[SLACK] Cannot send alert: %s (%s) %s %s %s' % (exp, type(exp), str(exp), str(exp) == 'channel_not_found', exp.__dict__)) # If it's just that the channel do not exists, try to create it if str(exp) == 'channel_not_found': try: self.logger.info( '[SLACK] Channel %s do no exists. Trying to create it.' % channel) slack.channels.create(channel) except Exception as exp: self.logger.error('[SLACK] Cannot create channel %s: %s' % (channel, exp)) return # Now try to resend the message try: self.__try_to_send_message(slack, attachments, channel) except Exception as exp: self.logger.error( '[SLACK] Did create channel %s but we still cannot send the message: %s' % (channel, exp)) def handle(self, obj, event): if_group = self.get_parameter('enabled_if_group') self.enabled = gossiper.is_in_group(if_group) if not self.enabled: self.logger.debug( 'Slack module is not enabled, skipping check alert sent') return self.logger.debug('Manage an obj event: %s (event=%s)' % (obj, event)) evt_type = event['evt_type'] if evt_type == 'check_execution': evt_data = event['evt_data'] check_did_change = evt_data['check_did_change'] if check_did_change: self.__send_slack_check(obj) if evt_type == 'group_change': evt_data = event['evt_data'] group_modification = evt_data['modification'] self.__send_slack_group(obj, group_modification) # Compliance: only when change, and only some switch cases should be # notify (drop useless changes) if evt_type == 'compliance_execution': evt_data = event['evt_data'] compliance_did_change = evt_data['compliance_did_change'] if compliance_did_change: self.__send_slack_compliance(obj)
class SynologyModule(ListenerModule): implement = 'synology' parameters = { 'enabled': BoolParameter(default=False), 'export_uri': StringParameter(default='http://92.222.35.193:8080/synology'), 'customer_key': StringParameter(default=''), 'inventory_number': StringParameter(default=''), } def __init__(self): ListenerModule.__init__(self) # Graphite reaping queue self.graphite_queue = [] self.enabled = False self.export_uri = '' self.customer_key = '' self.inventory_number = '' # Prepare to open the UDP port def prepare(self): self.logger.debug('Synology: prepare phase') self.enabled = self.get_parameter('enabled') self.export_uri = self.get_parameter('export_uri') def get_info(self): state = 'STARTED' if self.enabled else 'DISABLED' log = '' return {'configuration': self.get_config(), 'state': state, 'log': log} def launch(self): threader.create_and_launch(self.launch_main, name='Synology', essential=True, part='synology') # Thread for listening to the graphite port in UDP (2003) def launch_main(self): while not stopper.is_stop(): self.enabled = self.get_parameter('enabled') if not self.enabled: time.sleep(1) continue self.export_uri = self.get_parameter('export_uri') self.customer_key = self.get_parameter('customer_key') self.inventory_number = self.get_parameter('inventory_number') if not self.customer_key: self.warning('You must have a customer key') time.sleep(1) continue syno_collector = collectormgr.collectors.get('synology', None) if syno_collector is None: self.logger.error('The synology collector is missing') time.sleep(1) continue results = syno_collector.get('results', None) if results is None: self.logger.warning('The synology collector did not run') time.sleep(1) continue try: r = httper.post(self.export_uri, params={ 'uuid': gossiper.uuid, 'customer_key': self.customer_key, 'inventory_number': self.inventory_number, 'results': results }, headers={}) self.logger.debug("Result insert", r) except get_http_exceptions() as exp: self.logger.error( 'Cannot connect to export uri datasources: %s' % exp) time.sleep(1)
class GrafanaModule(ConnectorModule): implement = 'grafana' parameters = { 'enabled_if_group': StringParameter(default='grafana-connector'), 'uri': StringParameter(default='http://localhost:3000'), 'api_key': StringParameter(default=''), } def __init__(self): super(GrafanaModule, self).__init__() self.enabled = False self.enabled_if_group = 'grafana-connector' self.uri = 'http://localhost:3000' self.api_key = '' def prepare(self): self.logger.info('Grafana: prepare phase') self.uri = self.get_parameter('uri') self.api_key = self.get_parameter('api_key') def __get_headers(self): return { 'Content-Type': 'application/json;charset=UTF-8', 'Authorization': 'Bearer %s' % self.api_key } def insert_node_into_grafana(self, nuuid): node = gossiper.get(nuuid) if node is None: return name = node['name'] addr = node['addr'] port = node['port'] data_source_name = "%s--opsbro--%s" % (name, nuuid) entry = { "name": data_source_name, "type": "graphite", "url": "http://%s:%d" % (addr, port), "access": "proxy" } uri = '%s/api/datasources' % (self.uri) try: r = httper.post(uri, params=entry, headers=self.__get_headers()) self.logger.debug("Result insert", r) except get_http_exceptions() as exp: self.logger.error('Cannot connect to grafana datasources: %s' % exp) return def remove_data_source(self, data_source_id): self.logger.info( 'Cleaning data source %d from grafana because the node is no more' % data_source_id) uri = '%s/api/datasources/%d' % (self.uri, data_source_id) try: r = httper.delete(uri, headers=self.__get_headers()) self.logger.debug("Result delete", r) except get_http_exceptions() as exp: self.logger.error('Cannot connect to grafana datasources: %s' % exp) return def get_data_sources_from_grafana(self): uri = '%s/api/datasources' % (self.uri) our_data_sources = {} try: api_return = httper.get(uri, headers=self.__get_headers()) try: all_data_sources = jsoner.loads(api_return) except (ValueError, TypeError) as exp: self.logger.error( 'Cannot load json from grafana datasources: %s' % exp) return None except get_http_exceptions() as exp: self.logger.error('Cannot connect to grafana datasources: %s' % exp) return None self.logger.debug("All data sources") self.logger.debug(str(all_data_sources)) # Error message is a dict with just a key: message if isinstance(all_data_sources, dict): error_message = all_data_sources.get('message', '') if error_message: if error_message == 'Unauthorized': self.logger.error( 'Your API key is not autorized to list data sources.') return None self.logger.error('Unknown error from grafana API: %s' % error_message) return None # A data source will look like this: # [{u'name' : u'SuperBla', ## u'database': u'', # u'url': u'http://super:6768', # u'basicAuth': False, # u'jsonData': {}, # u'access': u'proxy', # u'typeLogoUrl': u'public/app/plugins/datasource/graphite/img/graphite_logo.png', # u'orgId': 1, # u'user': u'', # u'password': u'', # u'type': u'graphite', # u'id': 1, # u'isDefault': False}] for data_source in all_data_sources: if data_source.get('type', '') != 'graphite': continue src_name = data_source.get('name', '') if '--opsbro--' in src_name: elts = src_name.split('--opsbro--') if len(elts) == 2: nuuid = elts[1] our_data_sources[nuuid] = data_source return our_data_sources def launch(self): threader.create_and_launch( self.do_launch, name='Grafana module data sources synchronizer', essential=True, part='grafana') def do_launch(self): while not stopper.is_stop(): self.logger.debug('Grafana loop') # We go in enabled when, and only when our group is matching what we do expect if_group = self.get_parameter('enabled_if_group') self.enabled = gossiper.is_in_group(if_group) # Ok, if we are not enabled, so not even talk to grafana if not self.enabled: time.sleep(1) continue # Ok now time to work nodes_in_grafana = self.get_data_sources_from_grafana() # If we have an issue to grafana, skip this loop if nodes_in_grafana is None: time.sleep(1) continue nodes_in_grafana_set = set(nodes_in_grafana.keys()) gossip_nodes_uuids = gossiper.nodes.keys( ) # note: nodes is a static dict, no need to lock it gossip_nodes_uuids = set(gossip_nodes_uuids) self.logger.debug("Nodes in grafana", nodes_in_grafana_set) self.logger.debug("Nodes in gossip", gossip_nodes_uuids) nodes_that_must_be_clean = nodes_in_grafana_set - gossip_nodes_uuids nodes_to_insert = gossip_nodes_uuids - nodes_in_grafana_set self.logger.debug("Nodes that must be clean", nodes_that_must_be_clean) self.logger.debug("Nodes to insert into grafana", nodes_to_insert) for nuuid in nodes_to_insert: self.logger.debug("Nodes", nuuid, "must be inserted into grafana") self.insert_node_into_grafana(nuuid) for nuuid in nodes_that_must_be_clean: node_data_source_id = nodes_in_grafana[nuuid]['id'] self.logger.debug( "Node ", nuuid, "is no more need in grafana. Removing its data source") self.remove_data_source(node_data_source_id) # Do not hammer the cpu time.sleep(1)
class Mysql(Collector): parameters = { 'server' : StringParameter(default='127.0.0.1'), 'user' : StringParameter(default='root'), 'password' : StringParameter(default=''), 'port' : IntParameter(default=3306), 'socket' : StringParameter(default='/var/lib/mysql/mysql.sock'), 'replication_enabled': BoolParameter(default=False) } def __init__(self): super(Mysql, self).__init__() self.MySQLdb = None self.mysqlVersion = None self.mysqlConnectionsStore = None self.mysqlSlowQueriesStore = None def launch(self): logger = self.logger logger.debug('getMySQLStatus: start') if not self.is_in_group('mysql'): self.set_not_eligible('Please add the mysql group to enable this collector.') return if self.MySQLdb is None: # Try import MySQLdb, if installed on the system try: import MySQLdb self.MySQLdb = MySQLdb except ImportError as exp1: try: mydir = os.path.dirname(__file__) sys.path.insert(0, mydir) import pymysql as MySQLdb self.MySQLdb = MySQLdb sys.path = sys.path[1:] except ImportError as exp2: sys.path = sys.path[1:] self.set_error('Unable to import MySQLdb (%s) or embedded pymsql (%s)' % (exp1, exp2)) return False host = self.get_parameter('server') user = self.get_parameter('user') password = self.get_parameter('password') port = self.get_parameter('port') mysql_socket = self.get_parameter('socket') # You can connect with socket or TCP if not mysql_socket: try: db = self.MySQLdb.connect(host=host, user=user, passwd=password, port=port) except self.MySQLdb.OperationalError as exp: # ooooups self.set_error('MySQL connection error (server): %s' % exp) return False elif hasattr(socket, 'AF_UNIX'): try: db = self.MySQLdb.connect(host='localhost', user=user, passwd=password, port=port, unix_socket=mysql_socket) except self.MySQLdb.OperationalError as exp: self.set_error('MySQL connection error (socket): %s' % exp) return False else: self.set_error('MySQL is set to connect with unix socket but it is not available for windows.') return False logger.debug('getMySQLStatus: connected') # Get MySQL version if self.mysqlVersion is None: logger.debug('getMySQLStatus: mysqlVersion unset storing for first time') try: cursor = db.cursor() cursor.execute('SELECT VERSION()') result = cursor.fetchone() except self.MySQLdb.OperationalError as message: logger.error('getMySQLStatus: MySQL query error when getting version: %s', message) version = result[0].split('-') # Might include a description e.g. 4.1.26-log. See http://dev.mysql.com/doc/refman/4.1/en/information-functions.html#function_version version = version[0].split('.') self.mysqlVersion = [] for string in version: number = re.match('([0-9]+)', string) number = number.group(0) self.mysqlVersion.append(number) logger.debug('getMySQLStatus: getting Connections') # Connections try: cursor = db.cursor() cursor.execute('SHOW STATUS LIKE "Connections"') result = cursor.fetchone() except self.MySQLdb.OperationalError as message: logger.error('getMySQLStatus: MySQL query error when getting Connections = %s', message) if self.mysqlConnectionsStore is None: logger.debug('getMySQLStatus: mysqlConnectionsStore unset storing for first time') self.mysqlConnectionsStore = result[1] connections = 0 else: logger.debug('getMySQLStatus: mysqlConnectionsStore set so calculating') logger.debug('getMySQLStatus: self.mysqlConnectionsStore = %s', self.mysqlConnectionsStore) logger.debug('getMySQLStatus: result = %s', result[1]) connections = float(float(result[1]) - float(self.mysqlConnectionsStore)) / 60 self.mysqlConnectionsStore = result[1] logger.debug('getMySQLStatus: connections = %s', connections) logger.debug('getMySQLStatus: getting Connections - done') logger.debug('getMySQLStatus: getting Created_tmp_disk_tables') # Created_tmp_disk_tables # Determine query depending on version. For 5.02 and above we need the GLOBAL keyword if int(self.mysqlVersion[0]) >= 5 and int(self.mysqlVersion[2]) >= 2: query = 'SHOW GLOBAL STATUS LIKE "Created_tmp_disk_tables"' else: query = 'SHOW STATUS LIKE "Created_tmp_disk_tables"' try: cursor = db.cursor() cursor.execute(query) result = cursor.fetchone() except self.MySQLdb.OperationalError as message: logger.error('getMySQLStatus: MySQL query error when getting Created_tmp_disk_tables = %s', message) createdTmpDiskTables = float(result[1]) logger.debug('getMySQLStatus: createdTmpDiskTables = %s', createdTmpDiskTables) logger.debug('getMySQLStatus: getting Created_tmp_disk_tables - done') logger.debug('getMySQLStatus: getting Max_used_connections') # Max_used_connections try: cursor = db.cursor() cursor.execute('SHOW STATUS LIKE "Max_used_connections"') result = cursor.fetchone() except self.MySQLdb.OperationalError as message: logger.error('getMySQLStatus: MySQL query error when getting Max_used_connections = %s', message) maxUsedConnections = int(result[1]) logger.debug('getMySQLStatus: maxUsedConnections = %s', createdTmpDiskTables) logger.debug('getMySQLStatus: getting Max_used_connections - done') logger.debug('getMySQLStatus: getting Open_files') # Open_files try: cursor = db.cursor() cursor.execute('SHOW STATUS LIKE "Open_files"') result = cursor.fetchone() except self.MySQLdb.OperationalError as message: logger.error('getMySQLStatus: MySQL query error when getting Open_files = %s', message) openFiles = int(result[1]) logger.debug('getMySQLStatus: openFiles = %s', openFiles) logger.debug('getMySQLStatus: getting Open_files - done') # Slow_queries logger.debug('getMySQLStatus: getting Slow_queries') # Determine query depending on version. For 5.02 and above we need the GLOBAL keyword (case 31015) if int(self.mysqlVersion[0]) >= 5 and int(self.mysqlVersion[2]) >= 2: query = 'SHOW GLOBAL STATUS LIKE "Slow_queries"' else: query = 'SHOW STATUS LIKE "Slow_queries"' try: cursor = db.cursor() cursor.execute(query) result = cursor.fetchone() except self.MySQLdb.OperationalError as message: logger.error('getMySQLStatus: MySQL query error when getting Slow_queries = %s', message) if self.mysqlSlowQueriesStore is None: logger.debug('getMySQLStatus: mysqlSlowQueriesStore unset so storing for first time') self.mysqlSlowQueriesStore = result[1] slowQueries = 0 else: logger.debug('getMySQLStatus: mysqlSlowQueriesStore set so calculating') logger.debug('getMySQLStatus: self.mysqlSlowQueriesStore = %s', self.mysqlSlowQueriesStore) logger.debug('getMySQLStatus: result = %s', result[1]) slowQueries = float(float(result[1]) - float(self.mysqlSlowQueriesStore)) / 60 self.mysqlSlowQueriesStore = result[1] logger.debug('getMySQLStatus: slowQueries = %s', slowQueries) logger.debug('getMySQLStatus: getting Slow_queries - done') logger.debug('getMySQLStatus: getting Table_locks_waited') # Table_locks_waited try: cursor = db.cursor() cursor.execute('SHOW STATUS LIKE "Table_locks_waited"') result = cursor.fetchone() except self.MySQLdb.OperationalError as message: logger.error('getMySQLStatus: MySQL query error when getting Table_locks_waited = %s', message) tableLocksWaited = float(result[1]) logger.debug('getMySQLStatus: tableLocksWaited = %s', tableLocksWaited) logger.debug('getMySQLStatus: getting Table_locks_waited - done') logger.debug('getMySQLStatus: getting Threads_connected') # Threads_connected try: cursor = db.cursor() cursor.execute('SHOW STATUS LIKE "Threads_connected"') result = cursor.fetchone() except self.MySQLdb.OperationalError as message: logger.error('getMySQLStatus: MySQL query error when getting Threads_connected = %s', message) threadsConnected = int(result[1]) logger.debug('getMySQLStatus: threadsConnected = %s', threadsConnected) logger.debug('getMySQLStatus: getting Threads_connected - done') logger.debug('getMySQLStatus: getting Seconds_Behind_Master') secondsBehindMaster = 0 if self.get_parameter('replication_enabled'): # Seconds_Behind_Master try: cursor = db.cursor(self.MySQLdb.cursors.DictCursor) cursor.execute('SHOW SLAVE STATUS') result = cursor.fetchone() except self.MySQLdb.OperationalError as message: self.set_error('getMySQLStatus: MySQL query error when getting SHOW SLAVE STATUS = %s' % message) result = None if result is not None: try: secondsBehindMaster = result['Seconds_Behind_Master'] logger.debug('getMySQLStatus: secondsBehindMaster = %s' % secondsBehindMaster) except IndexError as exp: secondsBehindMaster = None logger.debug('getMySQLStatus: secondsBehindMaster empty. %s' % exp) else: secondsBehindMaster = None logger.debug('getMySQLStatus: secondsBehindMaster empty. Result = None.') logger.debug('getMySQLStatus: getting Seconds_Behind_Master - done') return {'connections' : connections, 'created_tmp_disk_tables': createdTmpDiskTables, 'max_used_connections' : maxUsedConnections, 'open_files': openFiles, 'slow_queries': slowQueries, 'table_locks_waited' : tableLocksWaited, 'threads_connected': threadsConnected, 'seconds_behind_master': secondsBehindMaster}
class Apache(Collector): parameters = { 'hostname': StringParameter(default='localhost'), 'user': StringParameter(default=''), 'password': StringParameter(default=''), } def __init__(self): super(Apache, self).__init__() self.apacheTotalAccesses = None def launch(self): if not self.is_in_group('apache'): self.set_not_eligible( 'Please add the apache group to enable this collector.') return logger = self.logger logger.debug('getApacheStatus: start') ''' passwordMgr = urllib2.HTTPPasswordMgrWithDefaultRealm() passwordMgr.add_password(None, self.config['apacheStatusUrl'], self.config['apacheStatusUser'], self.config['apacheStatusPass']) handler = urllib2.HTTPBasicAuthHandler(passwordMgr) # create "opener" (OpenerDirector instance) opener = urllib2.build_opener(handler) # use the opener to fetch a URL opener.open(self.config['apacheStatusUrl']) # Install the opener. # Now all calls to urllib2.urlopen use our opener. urllib2.install_opener(opener) ''' try: uri = 'http://%s/server-status/?auto' % self.get_parameter( 'hostname') user = self.get_parameter('user') password = self.get_parameter('password') response = httper.get(uri, timeout=3, user=user, password=password) except get_http_exceptions() as exp: stack = traceback.format_exc() self.log = stack self.set_error('Unable to get Apache status - Exception = %s' % exp) return False logger.debug('getApacheStatus: urlopen success, start parsing') # Split out each line lines = response.split('\n') # Loop over each line and get the values apacheStatus = {} logger.debug('getApacheStatus: parsing, loop') # Loop through and extract the numerical values for line in lines: values = line.split(': ') try: apacheStatus[str(values[0])] = values[1] except IndexError: break logger.debug('getApacheStatus: parsed') res = {} try: if apacheStatus['Total Accesses'] != False: logger.debug('getApacheStatus: processing total accesses') totalAccesses = float(apacheStatus['Total Accesses']) if self.apacheTotalAccesses is None or self.apacheTotalAccesses <= 0 or totalAccesses <= 0: res['req/s'] = 0.0 self.apacheTotalAccesses = totalAccesses logger.debug( 'getApacheStatus: no cached total accesses (or totalAccesses == 0), so storing for first time / resetting stored value' ) else: logger.debug( 'getApacheStatus: cached data exists, so calculating per sec metrics' ) res['req/s'] = (totalAccesses - self.apacheTotalAccesses) / 60 self.apacheTotalAccesses = totalAccesses else: self.set_error( 'getApacheStatus: Total Accesses not present in mod_status output. Is ExtendedStatus enabled?' ) except (IndexError, KeyError): self.set_error( 'getApacheStatus: IndexError - Total Accesses not present in mod_status output. Is ExtendedStatus enabled?' ) try: if apacheStatus['BusyWorkers'] != False and apacheStatus[ 'IdleWorkers'] != False: res['busy_workers'] = int(apacheStatus['BusyWorkers']) res['idle_workers'] = int(apacheStatus['IdleWorkers']) else: self.set_error( 'getApacheStatus: BusyWorkers/IdleWorkers not present in mod_status output. Is the URL correct (must have ?auto at the end)?' ) except (IndexError, KeyError): self.set_error( 'getApacheStatus: IndexError - BusyWorkers/IdleWorkers not present in mod_status output. Is the URL correct (must have ?auto at the end)?' ) return res