def commit_logs(self): """ Peridically called (commit_period), this method prepares a bunch of queued logs (commit_colume) to insert them in the DB """ if not self.logs_cache: return if not self.is_connected == CONNECTED: if not self.open(): logger.warning("[mongo-logs] log commiting failed") logger.warning("[mongo-logs] %d lines to insert in database", len(self.logs_cache)) return logger.debug("[mongo-logs] commiting ...") logger.debug("[mongo-logs] %d lines to insert in database (max insertion is %d lines)", len(self.logs_cache), self.commit_volume) # Flush all the stored log lines logs_to_commit = 1 now = time.time() some_logs = [] while True: try: # result = self.db[self.logs_collection].insert_one(self.logs_cache.popleft()) some_logs.append(self.logs_cache.popleft()) logs_to_commit = logs_to_commit + 1 if logs_to_commit >= self.commit_volume: break except IndexError: logger.debug("[mongo-logs] prepared all available logs for commit") break except Exception, exp: logger.error("[mongo-logs] exception: %s", str(exp))
def load_statsd(self): try: self.statsd_addr = (socket.gethostbyname(self.statsd_host), self.statsd_port) self.statsd_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) except (socket.error, socket.gaierror), exp: logger.error('Cannot create statsd socket: %s' % exp) return
def open(self): """ Connect to the Mongo DB with configured URI. Execute a command to check if connected on master to activate immediate connection to the DB because we need to know if DB server is available. Update log rotation time to force a log rotation """ self.con = MongoClient(self.uri, connect=False) logger.info("[mongo-logs] trying to connect MongoDB: %s", self.uri) try: result = self.con.admin.command("ismaster") logger.info("[mongo-logs] connected to MongoDB, admin: %s", result) logger.debug("[mongo-logs] server information: %s", self.con.server_info()) self.db = getattr(self.con, self.database) logger.info("[mongo-logs] connected to the database: %s (%s)", self.database, self.db) self.is_connected = CONNECTED self.next_logs_rotation = time.time() logger.info('[mongo-logs] database connection established') except ConnectionFailure as e: logger.error("[mongo-logs] Server is not available: %s", str(e)) return False except Exception as e: logger.error("[mongo-logs] Could not open the database", str(e)) raise MongoLogsError return True
def publish_archive(archive): # Now really publish it proxy = CONFIG['shinken.io']['proxy'] api_key = CONFIG['shinken.io']['api_key'] # Ok we will push the file with a 10s timeout c = pycurl.Curl() c.setopt(c.POST, 1) c.setopt(c.CONNECTTIMEOUT, 10) c.setopt(c.TIMEOUT, 10) if proxy: c.setopt(c.PROXY, proxy) c.setopt(c.URL, "http://shinken.io/push") c.setopt(c.HTTPPOST, [("api_key", api_key), ("data", (c.FORM_FILE, str(archive), c.FORM_CONTENTTYPE, "application/x-gzip")) ]) response = StringIO() c.setopt(pycurl.WRITEFUNCTION, response.write) c.setopt(c.VERBOSE, 1) c.perform() r = c.getinfo(pycurl.HTTP_CODE) c.close() if r != 200: logger.error("There was a critical error : %s" % response.getvalue()) sys.exit(2) else: ret = json.loads(response.getvalue().replace('\\/', '/')) status = ret.get('status') text = ret.get('text') if status == 200: logger.log(text) else: logger.error(text)
def manage_log_brok(self, b): if self.read_only: return data = b.data line = data['log'] if re.match("^\[[0-9]*\] [A-Z][a-z]*.:", line): # Match log which NOT have to be stored # print "Unexpected in manage_log_brok", line return try: logline = Logline(line=line) values = logline.as_tuple() if logline.logclass != LOGCLASS_INVALID: insert_log = True current_state = int(values[12]) if self.do_not_log_successive_ok and current_state == 0: dbresult = self.execute("SELECT state FROM logs WHERE host_name='%s' AND service_description='%s' AND class=%d ORDER BY time DESC LIMIT 1" % (values[6],values[11],int(values[2]))) if len(dbresult) > 0 and dbresult[0][0] == 0: insert_log = False if insert_log: self.execute('INSERT INTO LOGS VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)', values) except LiveStatusLogStoreError, exp: logger.error("[Logstore SQLite] An error occurred: %s", str(exp.args[0])) logger.error("[Logstore SQLite] DATABASE ERROR!!!!!!!!!!!!!!!!!")
def _set_ui_user_preference(self, user, key, value): if not self.db: logger.error("Problem during init phase") return None self.db.execute("INSERT OR REPLACE INTO ui_preferences (user, key, value) VALUES (?,?,?)", (user, key, value)) self.db.commit()
def init(self): logger.info("Try to open SQLite database at %s" % (self.uri)) try: self.db = sqlite3.connect(self.uri, check_same_thread=False) except Exception, e: logger.error("Error %s:" % e) raise
def hook_tick(self, brok): """Each second the broker calls the hook_tick function Every tick try to flush the buffer """ if self.use_pickle: if self.ticks >= self.tick_limit: # If the number of ticks where data was not # sent successfully to Graphite reaches the bufferlimit. # Reset the buffer and reset the ticks self.buffer = [] self.ticks = 0 return self.ticks += 1 # Format the data payload = cPickle.dumps(self.buffer) header = struct.pack("!L", len(payload)) packet = header + payload try: self.con.sendall(packet) except IOError, err: logger.error( "[Graphite broker] Failed sending to the Graphite Carbon instance network socket! IOError:%s" % str(err) ) return # Flush the buffer after a successful send to Graphite self.buffer = []
def manage_log_brok(self, b): data = b.data line = data['log'] if re.match("^\[[0-9]*\] [A-Z][a-z]*.:", line): # Match log which NOT have to be stored # print "Unexpected in manage_log_brok", line return logline = Logline(line=line) values = logline.as_dict() if logline.logclass != LOGCLASS_INVALID: try: self.db[self.collection].insert(values) self.is_connected = CONNECTED # If we have a backlog from an outage, we flush these lines # First we make a copy, so we can delete elements from # the original self.backlog backloglines = [bl for bl in self.backlog] for backlogline in backloglines: try: self.db[self.collection].insert(backlogline) self.backlog.remove(backlogline) except AutoReconnect, exp: self.is_connected = SWITCHING except Exception, exp: logger.error("[LogStoreMongoDB] Got an exception inserting the backlog" % str(exp))
def hook_save_retention(self, daemon): """ main function that is called in the retention creation pass """ logger.debug("[MemcacheRetention] asking me to update the retention objects") all_data = daemon.get_retention_data() hosts = all_data['hosts'] services = all_data['services'] # Now the flat file method for h_name in hosts: try: h = hosts[h_name] key = self.normalize_key("HOST-%s" % h_name) val = cPickle.dumps(h) self.mc.set(key, val) except: logger.error("[MemcacheRetention] error while saving host %s" % key) for (h_name, s_desc) in services: try: key = self.normalize_key("SERVICE-%s,%s" % (h_name, s_desc)) s = services[(h_name, s_desc)] val = cPickle.dumps(s) self.mc.set(key, val) except: logger.error("[MemcacheRetention] error while saving service %s" % key) self.mc.disconnect_all() logger.info("Retention information updated in Memcache")
def inventor(look_at): # Now really publish it inventory = CONFIG['paths']['inventory'] logger.debug("dumping inventory %s" % inventory) # get all sub-direcotries for d in os.listdir(inventory): if os.path.exists(os.path.join(inventory, d, 'package.json')): if not look_at or d in look_at: print d # If asked, dump the content.package content if look_at or d in look_at: content_p = os.path.join(inventory, d, 'content.json') if not os.path.exists(content_p): logger.error('Missing %s file' % content_p) continue try: j = json.loads(open(content_p, 'r').read()) except Exception, exp: logger.error('Bad %s file "%s"' % (content_p, exp)) continue for d in j: s = '' if d['type'] == '5': # tar direcotry s += '(d)' else: s += '(f)' s += d['name'] print s
def publish_archive(archive): # Now really publish it proxy = CONFIG['shinken.io']['proxy'] api_key = CONFIG['shinken.io']['api_key'] # Ok we will push the file with a 10s timeout c = pycurl.Curl() c.setopt(c.POST, 1) c.setopt(c.CONNECTTIMEOUT, 30) c.setopt(c.TIMEOUT, 300) if proxy: c.setopt(c.PROXY, proxy) c.setopt(c.URL, "http://shinken.io/push") c.setopt(c.HTTPPOST, [("api_key", api_key), ("data", (c.FORM_FILE, str(archive), c.FORM_CONTENTTYPE, "application/x-gzip")) ]) response = StringIO() c.setopt(pycurl.WRITEFUNCTION, response.write) c.setopt(c.VERBOSE, 1) try: c.perform() except pycurl.error, exp: logger.error("There was a critical error : %s" % exp) return
def grab_package(pname): cprint('Grabbing : ' , end='') cprint('%s' % pname, 'green') # Now really publish it proxy = CONFIG['shinken.io']['proxy'] api_key = CONFIG['shinken.io']['api_key'] # Ok we will push the file with a 5m timeout c = pycurl.Curl() c.setopt(c.POST, 0) c.setopt(c.CONNECTTIMEOUT, 30) c.setopt(c.TIMEOUT, 300) if proxy: c.setopt(c.PROXY, proxy) c.setopt(c.URL, str('shinken.io/grab/%s' % pname)) response = StringIO() c.setopt(pycurl.WRITEFUNCTION, response.write) #c.setopt(c.VERBOSE, 1) try: c.perform() except pycurl.error, exp: logger.error("There was a critical error : %s" % exp) return ''
def hook_tick(self, brok): """Each second the broker calls the hook_tick function Every tick try to flush the buffer """ if self.use_pickle: if self.ticks >= self.tick_limit: # If the number of ticks where data was not # sent successfully to Graphite reaches the bufferlimit. # Reset the buffer and reset the ticks logger.error("[Graphite broker] Buffering time exceeded. Freeing buffer") self.buffer = [] self.ticks = 0 return # Format the data payload = cPickle.dumps(self.buffer) header = struct.pack("!L", len(payload)) packet = header + payload try: self.send_packet(packet) # Flush the buffer after a successful send to Graphite self.buffer = [] self.ticks = 0 except IOError, err: self.ticks += 1 logger.error("[Graphite broker] Sending data Failed. Buffering state : %s / %s" % ( self.ticks , self.tick_limit ))
def get_services_by_explosion(self, servicegroups): # First we tag the hg so it will not be explode # if a son of it already call it self.already_explode = True # Now the recursive part # rec_tag is set to False every HG we explode # so if True here, it must be a loop in HG # calls... not GOOD! if self.rec_tag: logger.error("[servicegroup::%s] got a loop in servicegroup definition", self.get_name()) if self.has('members'): return self.members else: return '' # Ok, not a loop, we tag it and continue self.rec_tag = True sg_mbrs = self.get_servicegroup_members() for sg_mbr in sg_mbrs: sg = servicegroups.find_by_name(sg_mbr.strip()) if sg is not None: value = sg.get_services_by_explosion(servicegroups) if value is not None: self.add_string_member(value) if self.has('members'): return self.members else: return ''
def add(self, elt): cls_type = elt.__class__.my_type if cls_type == 'brok': # For brok, we TAG brok with our instance_id elt.instance_id = 0 self.broks_internal_raised.append(elt) return elif cls_type == 'externalcommand': logger.debug("Enqueuing an external command '%s'" % str(ExternalCommand.__dict__)) self.external_commands.append(elt) # Maybe we got a Message from the modules, it's way to ask something # like from now a full data from a scheduler for example. elif cls_type == 'message': # We got a message, great! logger.debug(str(elt.__dict__)) if elt.get_type() == 'NeedData': data = elt.get_data() # Full instance id means: I got no data for this scheduler # so give me all dumbass! if 'full_instance_id' in data: c_id = data['full_instance_id'] source = elt.source logger.info('The module %s is asking me to get all initial data from the scheduler %d' % (source, c_id)) # so we just reset the connection and the running_id, it will just get all new things try: self.schedulers[c_id]['con'] = None self.schedulers[c_id]['running_id'] = 0 except KeyError: # maybe this instance was not known, forget it logger.warning("the module %s ask me a full_instance_id for an unknown ID (%d)!" % (source, c_id)) # Maybe a module tells me that it's dead, I must log it's last words... if elt.get_type() == 'ICrash': data = elt.get_data() logger.error('the module %s just crash! Please look at the traceback:' % data['name']) logger.error(data['trace'])
def open(self): try: if self.replica_set: self.conn = pymongo.ReplicaSetConnection(self.mongodb_uri, replicaSet=self.replica_set, fsync=self.mongodb_fsync) else: # Old versions of pymongo do not known about fsync if ReplicaSetConnection: self.conn = pymongo.Connection(self.mongodb_uri, fsync=self.mongodb_fsync) else: self.conn = pymongo.Connection(self.mongodb_uri) self.db = self.conn[self.database] self.db[self.collection].ensure_index([('host_name', pymongo.ASCENDING), ('time', pymongo.ASCENDING), ('lineno', pymongo.ASCENDING)], name='logs_idx') self.db[self.collection].ensure_index([('time', pymongo.ASCENDING), ('lineno', pymongo.ASCENDING)], name='time_1_lineno_1') if self.replica_set: pass # This might be a future option prefer_secondary #self.db.read_preference = ReadPreference.SECONDARY self.is_connected = CONNECTED self.next_log_db_rotate = time.time() except AutoReconnect as err: # now what, ha? logger.error("[LogStoreMongoDB] LiveStatusLogStoreMongoDB.AutoReconnect %s" % err) # The mongodb is hopefully available until this module is restarted raise LiveStatusLogStoreError(err) except Exception as err: # If there is a replica_set, but the host is a simple standalone one # we get a "No suitable hosts found" here. # But other reasons are possible too. logger.error("[LogStoreMongoDB] Could not open the database: %s" % err) raise LiveStatusLogStoreError(err)
def search(look_at): # Now really publish it proxy = CONFIG['shinken.io']['proxy'] api_key = CONFIG['shinken.io']['api_key'] # Ok we will push the file with a 10s timeout c = pycurl.Curl() c.setopt(c.POST, 0) c.setopt(c.CONNECTTIMEOUT, 10) c.setopt(c.TIMEOUT, 10) if proxy: c.setopt(c.PROXY, proxy) args = {'keywords':','.join(look_at)} c.setopt(c.URL, str('shinken.io/searchcli?'+urllib.urlencode(args))) response = StringIO() c.setopt(pycurl.WRITEFUNCTION, response.write) #c.setopt(c.VERBOSE, 1) c.perform() r = c.getinfo(pycurl.HTTP_CODE) c.close() if r != 200: logger.error("There was a critical error : %s" % response.getvalue()) sys.exit(2) else: ret = json.loads(response.getvalue().replace('\\/', '/')) status = ret.get('status') result = ret.get('result') if status != 200: logger.log(result) return [] return result
def hook_late_configuration(self, arb): """ Read config and fill database """ mac_resol = MacroResolver() mac_resol.init(arb.conf) for serv in arb.conf.services: if serv.check_command.command.module_type == 'snmp_booster': try: # Serialize service dict_serv = dict_serialize(serv, mac_resol, self.datasource) except Exception as exp: logger.error("[SnmpBooster] [code 0907] [%s,%s] " "%s" % (serv.host.get_name(), serv.get_name(), str(exp))) continue # We want to make a diff between arbiter insert and poller insert. Some backend may need it. try: self.db_client.update_service_init(dict_serv['host'], dict_serv['service'], dict_serv) except Exception as exp: logger.error("[SnmpBooster] [code 0909] [%s,%s] " "%s" % (dict_serv['host'], dict_serv['service'], str(exp))) continue logger.info("[SnmpBooster] [code 0908] Done parsing") # Disconnect from database self.db_client.disconnect()
def linkify_sd_by_s(self, hosts, services): for sd in self: try: s_name = sd.dependent_service_description hst_name = sd.dependent_host_name # The new member list, in id s = services.find_srv_by_name_and_hostname(hst_name, s_name) if s is None: self.configuration_errors.append("Service %s not found for host %s" % (s_name, hst_name)) sd.dependent_service_description = s s_name = sd.service_description hst_name = sd.host_name # The new member list, in id s = services.find_srv_by_name_and_hostname(hst_name, s_name) if s is None: self.configuration_errors.append("Service %s not found for host %s" % (s_name, hst_name)) sd.service_description = s except AttributeError, exp: logger.error("[servicedependency] fail to linkify by service %s: %s", sd, exp)
def get_ui_availability(self, elt, range_start=None, range_end=None): import pymongo if not self.db: logger.error("[mongo-logs] error Problem during init phase, no database connection") return None logger.debug("[mongo-logs] get_ui_availability, name: %s", elt) query = [{"hostname": elt.host_name}] if elt.__class__.my_type == 'service': query.append({"service": elt.service_description}) if range_start: query.append({'day_ts': {'$gte': range_start}}) if range_end: query.append({'day_ts': {'$lte': range_end}}) query = {'$and': query} logger.debug("[mongo-logs] Fetching records from database with query: '%s'", query) records = [] try: for log in self.db[self.hav_collection].find(query).sort([ ("day",pymongo.DESCENDING), ("hostname",pymongo.ASCENDING), ("service",pymongo.ASCENDING)]): if '_id' in log: del log['_id'] records.append(log) logger.debug("[mongo-logs] %d records fetched from database.", len(records)) except Exception, exp: logger.error("[mongo-logs] Exception when querying database: %s", str(exp))
def create_pack(self, buf, name): if not json: logger.warning("[Pack] cannot load the pack file '%s': missing json lib", name) return # Ok, go compile the code try: d = json.loads(buf) if not 'name' in d: logger.error("[Pack] no name in the pack '%s'", name) return p = Pack({}) p.pack_name = d['name'] p.description = d.get('description', '') p.macros = d.get('macros', {}) p.templates = d.get('templates', [p.pack_name]) p.path = d.get('path', 'various/') p.doc_link = d.get('doc_link', '') p.services = d.get('services', {}) p.commands = d.get('commands', []) if not p.path.endswith('/'): p.path += '/' # Ok, add it self[p.id] = p except ValueError, exp: logger.error("[Pack] error in loading pack file '%s': '%s'", name, exp)
def is_correct(self): state = True cls = self.__class__ # Raised all previously saw errors like unknown commands or timeperiods if self.configuration_errors != []: state = False for err in self.configuration_errors: logger.error("[item::%s] %s", self.get_name(), err) for prop, entry in cls.properties.items(): if prop not in cls._special_properties: if not hasattr(self, prop) and entry.required: logger.warning("[checkmodulation::%s] %s property not set", self.get_name(), prop) state = False # Bad boy... # Ok now we manage special cases... # Service part if not hasattr(self, 'check_command'): logger.warning("[checkmodulation::%s] do not have any check_command defined", self.get_name()) state = False else: if self.check_command is None: logger.warning("[checkmodulation::%s] a check_command is missing", self.get_name()) state = False if not self.check_command.is_valid(): logger.warning("[checkmodulation::%s] a check_command is invalid", self.get_name()) state = False # Ok just put None as check_period, means 24x7 if not hasattr(self, 'check_period'): self.check_period = None return state
def is_correct(self): b = self.day in Daterange.weekdays if not b: logger.error("Error: %s is not a valid day", self.day) # Check also if Daterange is correct. b &= Daterange.is_correct(self) return b
def get_instance(plugin): name = plugin.get_name() logger.info("Get a Syslog broker for plugin %s" % (name)) # syslog.syslog priority defaults to (LOG_INFO | LOG_USER) facility = syslog.LOG_USER priority = syslog.LOG_INFO # Get configuration values, if any if hasattr(plugin, 'facility'): facility = plugin.facility if hasattr(plugin, 'priority'): priority = plugin.priority # Ensure config values have a string type compatible with # SysLogHandler.encodePriority if type(facility) in types.StringTypes: facility = types.StringType(facility) if type(priority) in types.StringTypes: priority = types.StringType(priority) # Convert facility / priority (integers or strings) to aggregated # priority value sh = SysLogHandler() try: priority = sh.encodePriority(facility, priority) except TypeError, e: logger.error("[%s] Couldn't get syslog priority, " "reverting to defaults" % (name))
def check_alive_instances(self): # Only for external for inst in self.instances: if not inst in self.to_restart: if inst.is_external and not inst.process.is_alive(): logger.error("The external module %s goes down unexpectedly!" % inst.get_name()) logger.info("Setting the module %s to restart" % inst.get_name()) # We clean its queues, they are no more useful inst.clear_queues(self.manager) self.to_restart.append(inst) # Ok, no need to look at queue size now continue # Now look for man queue size. If above value, the module should got a huge problem # and so bailout. It's not a perfect solution, more a watchdog # If max_queue_size is 0, don't check this if self.max_queue_size == 0: continue # Ok, go launch the dog! queue_size = 0 try: queue_size = inst.to_q.qsize() except Exception, exp: pass if queue_size > self.max_queue_size: logger.error("The external module %s got a too high brok queue size (%s > %s)!" % (inst.get_name(), queue_size, self.max_queue_size)) logger.info("Setting the module %s to restart" % inst.get_name()) # We clean its queues, they are no more useful inst.clear_queues(self.manager) self.to_restart.append(inst)
def launch_query(self): """ Prepare the request object's filter stacks """ # The Response object needs to access the Query self.response.load(self) # A minimal integrity check if not self.table: return [] try: # Remember the number of stats filters. We need these numbers as columns later. # But we need to ask now, because get_live_data() will empty the stack if self.table == 'log': result = self.get_live_data_log() else: # If the pnpgraph_present column is involved, then check # with each request if the pnp perfdata path exists if 'pnpgraph_present' in self.columns + self.filtercolumns + self.prefiltercolumns and self.pnp_path and os.access(self.pnp_path, os.R_OK): self.pnp_path_readable = True else: self.pnp_path_readable = False # Apply the filters on the broker's host/service/etc elements result = self.get_live_data() except Exception, e: import traceback logger.error("[Livestatus Wait Query] Error: %s" % e) traceback.print_exc(32) result = []
def get_hosts_by_explosion(self, hostgroups): # First we tag the hg so it will not be explode # if a son of it already call it self.already_explode = True # Now the recursive part # rec_tag is set to False every HG we explode # so if True here, it must be a loop in HG # calls... not GOOD! if self.rec_tag: logger.error("[hostgroup::%s] got a loop in hostgroup definition", self.get_name()) return self.get_hosts() # Ok, not a loop, we tag it and continue self.rec_tag = True hg_mbrs = self.get_hostgroup_members() for hg_mbr in hg_mbrs: hg = hostgroups.find_by_name(hg_mbr.strip()) if hg is not None: value = hg.get_hosts_by_explosion(hostgroups) if value is not None: self.add_string_member(value) return self.get_hosts()
def grab_package(pname): print "Trying to grab package", pname # Now really publish it proxy = CONFIG['shinken.io']['proxy'] api_key = CONFIG['shinken.io']['api_key'] # Ok we will push the file with a 10s timeout c = pycurl.Curl() c.setopt(c.POST, 0) c.setopt(c.CONNECTTIMEOUT, 10) c.setopt(c.TIMEOUT, 10) if proxy: c.setopt(c.PROXY, proxy) c.setopt(c.URL, str('shinken.io/grab/%s' % pname)) response = StringIO() c.setopt(pycurl.WRITEFUNCTION, response.write) #c.setopt(c.VERBOSE, 1) c.perform() r = c.getinfo(pycurl.HTTP_CODE) c.close() if r != 200: logger.error("There was a critical error : %s" % response.getvalue()) sys.exit(2) else: ret = response.getvalue() print "GOT A RETURN OF", len(ret) return ret
def get_new_broks(self, type='scheduler'): # Get the good links tab for looping.. links = self.get_links_from_type(type) if links is None: logger.debug('Type unknown for connection! %s', type) return # We check for new check in each schedulers and put # the result in new_checks for sched_id in links: try: con = links[sched_id]['con'] if con is not None: # None = not initialized t0 = time.time() # Before ask a call that can be long, do a simple ping to be sure it is alive con.get('ping') tmp_broks = con.get('get_broks', {'bname':self.name}, wait='long') try: _t = base64.b64decode(tmp_broks) _t = zlib.decompress(_t) tmp_broks = cPickle.loads(_t) except (TypeError, zlib.error, cPickle.PickleError), exp: logger.error('Cannot load broks data from %s : %s', links[sched_id]['name'], exp) links[sched_id]['con'] = None continue logger.debug("%s Broks get in %s", len(tmp_broks), time.time() - t0) for b in tmp_broks.values(): b.instance_id = links[sched_id]['instance_id'] # Ok, we can add theses broks to our queues self.add_broks_to_queue(tmp_broks.values()) else: # no con? make the connection self.pynag_con_init(sched_id, type=type)
def init(self): logger.info("[Graphite broker] I init the %s server connection to %s:%d" % (self.get_name(), str(self.host), self.port)) try: self.con = socket() self.con.connect((self.host, self.port)) except IOError, err: logger.error("[Graphite broker] Graphite Carbon instance network socket! IOError:%s" % str(err)) raise
def linkify_sd_by_tp(self, timeperiods): for sd in self: try: tp_name = sd.dependency_period tp = timeperiods.find_by_name(tp_name) sd.dependency_period = tp except AttributeError, exp: logger.error("[servicedependency] fail to linkify by timeperiods: %s" % exp)
def postmessage(self, message, retry=False): # process enqueud events if possible self.pop_events() if message["source_type"] == "component": key = "%s.%s.%s.%s.%s" % ( message["connector"], message["connector_name"], message["event_type"], message["source_type"], message["component"]) else: key = "%s.%s.%s.%s.%s[%s]" % ( message["connector"], message["connector_name"], message["event_type"], message["source_type"], message["component"], message["resource"]) # connection management if not self.connected(): logger.error("[Canopsis] Create connection") self.create_connection() self.connect() # publish message if self.connected(): logger.debug("[Canopsis] using routing key %s" % key) logger.debug("[Canopsis] sending %s" % str(message)) try: self.producer.revive(self.channel) self.producer.publish(body=message, compression=None, routing_key=key, exchange=self.exchange_name) return True except: logger.error( "[Canopsis] Not connected, going to queue messages until connection back" ) self.queue.append({"key": key, "message": message}) func = sys._getframe(1).f_code.co_name error = str(sys.exc_info()[0]) logger.error("[Canopsis] Unexpected error: %s in %s" % (error, func)) # logger.error(str(traceback.format_exc())) return False else: errmsg = "[Canopsis] Not connected, going to queue messages until connection back (%s items in queue | max %s)" % ( str(len(self.queue)), str(self.maxqueuelength)) logger.error(errmsg) #enqueue_cano_event(key,message) if len(self.queue) < int(self.maxqueuelength): self.queue.append({"key": key, "message": message}) logger.debug("[Canopsis] Queue length: %d" % len(self.queue)) return True else: logger.error( "[Canopsis] Maximum retention for event queue %s reached" % str(self.maxqueuelength)) return False
def manage_brok(self, brok): if brok.type in [ 'service_check_result', 'host_check_result', 'update_service_status', 'update_host_status' ]: if self.debug: logger.info('[alerta]: %s' % brok.data) data = brok.data if brok.type in ['service_check_result', 'update_service_status']: check_type = 'Service Check' else: check_type = 'Host Check' state = data.get('state', None) if state == 'CRITICAL': severity = 'critical' elif state == 'DOWN': severity = 'major' elif state in ['UP', 'OK']: severity = 'ok' elif state == 'PENDING': severity = 'indeterminate' else: severity = 'warning' payload = { 'resource': data['host_name'], 'event': data.get('service_description', check_type), 'environment': self.environment, 'severity': severity, 'service': ['Platform'], 'group': 'Shinken', 'value': '%s (%s)' % (data['state'], data['state_type']), 'text': data['long_output'] or data['output'], 'tags': [], 'attributes': {}, 'origin': 'shinken/%s' % platform.uname()[1], 'type': brok.type, 'rawData': data, 'customer': self.customer } if data['problem_has_been_acknowledged']: payload['status'] = 'ack' try: url = self.endpoint + '/alert' response = self.session.post(url, json=payload, headers=self.headers) if self.debug: logger.info('[alerta]: %s' % response.text) except exceptions.RequestException as e: logger.error(str(e))
def manage_service_check_result_brok(self, b): data = b.data perf_data = data['perf_data'] couples = self.get_metric_and_value(perf_data) # If no values, we can exit now if len(couples) == 0: return hname = self.illegal_char.sub('_', data['host_name']) if data['host_name'] in self.host_dict: customs_datas = self.host_dict[data['host_name']] if '_GRAPHITE_PRE' in customs_datas: hname = ".".join((customs_datas['_GRAPHITE_PRE'], hname)) desc = self.illegal_char.sub('_', data['service_description']) if (data['host_name'], data['service_description']) in self.svc_dict: customs_datas = self.svc_dict[(data['host_name'], data['service_description'])] if '_GRAPHITE_POST' in customs_datas: desc = ".".join((desc, customs_datas['_GRAPHITE_POST'])) check_time = int(data['last_chk']) try: logger.debug("[Graphite broker] Hostname: %s, Desc: %s, check time: %d, perfdata: %s" % (hname, desc, check_time, str(perf_data))) except UnicodeEncodeError: pass if self.graphite_data_source: path = '.'.join((hname, self.graphite_data_source, desc)) else: path = '.'.join((hname, desc)) if self.use_pickle: # Buffer the performance data lines for (metric, value) in couples: if value: self.buffer.append(("%s.%s" % (path, metric), ("%d" % check_time, "%s" % str(value)))) else: lines = [] # Send a bulk of all metrics at once for (metric, value) in couples: if value: lines.append("%s.%s %s %d" % (path, metric, str(value), check_time)) packet = '\n'.join(lines) + '\n' # Be sure we put \n every where try: logger.debug("[Graphite broker] Launching: %s" % packet) except UnicodeEncodeError: pass try: self.send_packet(packet) except IOError, err: logger.error("[Graphite broker] Failed sending to the Graphite Carbon. Data are lost")
def get_channel(self): try: self.channel = self.connection.channel() except: func = sys._getframe(1).f_code.co_name error = str(sys.exc_info()[0]) logger.error("[Canopsis] Unexpected error: %s in %s" % (error, func)) return False
def manage_host_check_result_brok(self, b): data = b.data perf_data = data['perf_data'] couples = self.get_metric_and_value(perf_data) # If no values, we can exit now if len(couples) == 0: return hname = self.illegal_char.sub('_', data['host_name']) if data['host_name'] in self.host_dict: customs_datas = self.host_dict[data['host_name']] if '_GRAPHITE_PRE' in customs_datas: hname = ".".join((customs_datas['_GRAPHITE_PRE'], hname)) if self.ignore_latency_limit >= data['latency'] > 0: check_time = int(data['last_chk']) - int(data['latency']) logger.info( "[Graphite broker] Ignoring latency for host %s. Latency : %s", data['host_name'], data['latency']) else: check_time = int(data['last_chk']) #try: # logger.debug("[Graphite broker] Hostname %s, check time: %d, perfdata: %s" # % (hname, check_time, str(perf_data))) #except UnicodeEncodeError: # pass if self.graphite_data_source: path = '.'.join((hname, self.graphite_data_source)) else: path = hname if self.use_pickle: # Buffer the performance data lines for (metric, value) in couples: self.buffer.append(("%s.__HOST__.%s" % (path, metric), ("%d" % check_time, "%s" % value))) else: lines = [] # Send a bulk of all metrics at once for (metric, value) in couples: lines.append("%s.__HOST__.%s %s %d" % (path, metric, value, check_time)) packet = '\n'.join(lines) + '\n' # Be sure we put \n every where #try: # logger.debug("[Graphite broker] Launching: %s" % packet) #except UnicodeEncodeError: # pass try: self.send_packet(packet) except IOError: logger.error( "[Graphite broker] Failed sending to the Graphite Carbon." " Data are lost")
def _set_ui_user_preference(self, user, key, value): if not self.db: logger.error("Problem during init phase") return None self.db.execute( "INSERT OR REPLACE INTO ui_preferences (user, key, value) VALUES (?,?,?)", (user, key, value)) self.db.commit()
def connect(self): """ This function inits the connection to the database """ try: self.db_conn = StrictRedis(host=self.db_host, port=self.db_port) except Exception as exp: logger.error("[SnmpBooster] [code 1302] Redis Connection error:" " %s" % str(exp)) return False return True
def check_exclude_rec(self): if self.rec_tag: logger.error("[timeentry::%s] is in a loop in exclude parameter" % self.get_name()) return False self.rec_tag = True for tp in self.exclude: tp.check_exclude_rec() return True
def check_auth(self, user, password): # If we do not have an ldap uri, no auth :) if not self.ldap_uri: return False logger.debug( "[Active Directory UI] Trying to auth by ldap with user %s" % user) c = self.app.datamgr.get_contact(user) if not c: logger.warning( "[Active Directory UI] AD/Ldap: invalid user %s (not found)" % user) return False # I don't know why, but ldap automagically auth void password. That's just stupid I think # so we don't allow them. if not password: logger.warning( "[Active Directory UI] AD/Ldap: void password are not allowed (user:%s)" % user) return False # first we need to find the principalname of this entry # because it can be a user name like j.gabes, but we should auth by ldap # with [email protected] for example elts = self.find_contact_entry(c) try: # On AD take the uid / principalename if self.mode == 'ad': # Maybe the entry is void.... if self.auth_key in elts: account_name = elts[self.auth_key][0] else: # For openldap, use the full DN account_name = elts[self.auth_key] except KeyError: logger.warning( "[Active Directory UI] Cannot find the %s entry, so use the user entry" % self.auth_key) account_name = user local_con = ldap.initialize(self.ldap_uri) local_con.set_option(ldap.OPT_REFERRALS, 0) # Any errors will throw an ldap.LDAPError exception # or related exception so you can ignore the result try: local_con.simple_bind_s(account_name, password) logger.info( "[Active Directory UI] AD/Ldap Connection done with user %s and password %s" % (user, password)) return True except ldap.LDAPError, exp: logger.error("[Active Directory UI] Ldap auth error: %s" % str(exp))
def read_package_json(fd): buf = fd.read() fd.close() buf = buf.decode('utf8', 'ignore') package_json = json.loads(buf) if not package_json: logger.error("Bad package.json file") sys.exit(2) return package_json
def read_package_json(fd): buf = fd.read() fd.close() buf = buf.decode('utf8', 'ignore') try: package_json = json.loads(buf) except ValueError, exp: logger.error("Bad package.json file : %s", exp) sys.exit(2)
def log_db_do_archive(self): """ In order to limit the datafile's sizes we flush logs dating from before today/00:00 to their own datafiles. """ if self.read_only: return try: os.stat(self.archive_path) except: os.mkdir(self.archive_path) for day in self.log_db_historic_contents(): dayobj, handle, archive, starttime, stoptime = day if handle == "main": # Skip archiving of today's contents continue if not os.path.exists(archive): # Create an empty datafile with the logs table #tmpconn = LiveStatusDb(archive, None, 0) #tmpconn.prepare_log_db_table() #tmpconn.close() dbmodconf = Module({ 'module_name': 'LogStore', 'module_type': 'logstore_sqlite', 'use_aggressive_sql': '0', 'database_file': archive, 'max_logs_age': '0', }) tmpconn = LiveStatusLogStoreSqlite(dbmodconf) tmpconn.open() tmpconn.close() self.commit() logger.info( "[Logstore SQLite] move logs from %s - %s to database %s" % (time.asctime(time.localtime(starttime)), time.asctime(time.localtime(stoptime)), archive)) cmd = "ATTACH DATABASE '%s' AS %s" % (archive, handle) self.execute_attach(cmd) cmd = "INSERT INTO %s.logs SELECT * FROM logs WHERE time >= %d AND time < %d" % ( handle, starttime, stoptime) self.execute(cmd) cmd = "DELETE FROM logs WHERE time >= %d AND time < %d" % ( starttime, stoptime) self.execute(cmd) self.commit() cmd = "DETACH DATABASE %s" % handle self.execute(cmd) # This is necessary to shrink the database file try: self.execute('VACUUM') except sqlite3.DatabaseError, exp: logger.error( "[Logstore SQLite] WARNING: it seems your database is corrupted. Please recreate it" ) self.commit()
def from_contact(cls, contact): user = contact try: user.__class__ = User except Exception as exp: logger.error("[WebUI - ui_user] get from contact: %s", str(exp)) raise Exception(user) return user
def connect(self): """ This function inits the connection to the database """ try: self.db_conn = MongoClient(self.db_host, self.db_port) except Exception as exp: logger.error("[SnmpBooster] [code 1202] Mongodb Connection error:" " %s" % str(exp)) return False return True
def init_http(self): logger.info("[WS_Arbiter] Starting WS arbiter http socket") try: self.srv = run(host=self.host, port=self.port, server='wsgirefselect') except Exception, e: logger.error("[WS_Arbiter] Exception : %s" % str(e)) raise
def linkify_hd_by_tp(self, timeperiods): for hd in self: try: tp_name = hd.dependency_period tp = timeperiods.find_by_name(tp_name) hd.dependency_period = tp except AttributeError, exp: logger.error( "[hostdependency] fail to linkify by timeperiod: %s", exp)
def get_instance(plugin): logger.debug("Get a TSCA arbiter module for plugin %s" % plugin.get_name()) try: from tsca import TSCA_arbiter except ImportError, exp: logger.error("Warning: the plugin type %s is unavailable: %s" % ('TSCA', exp)) return None
def open(self): """Open a connection to the mongodb server and check the connection by updating a documetn in a collection""" try: from pymongo import MongoClient except ImportError: logger.error( "[WebUI-MongoDBPreferences] Can not import pymongo.MongoClient" ) raise try: if self.replica_set: self.con = MongoClient(self.uri, replicaSet=self.replica_set, fsync=self.mongodb_fsync) else: self.con = MongoClient(self.uri, fsync=self.mongodb_fsync) logger.info("[WebUI-MongoDBPreferences] connected to mongodb: %s", self.uri) self.db = getattr(self.con, self.database) logger.info( "[WebUI-MongoDBPreferences] connected to the database: %s", self.database) if self.username and self.password: self.db.authenticate(self.username, self.password) logger.info( "[WebUI-MongoDBPreferences] user authenticated: %s", self.username) # Update a document test item in the collection to confirm correct connection logger.info( "[WebUI-MongoDBPreferences] updating connection test item in the collection ..." ) self.db.ui_user_preferences.update_one( {"_id": "test-ui_prefs"}, {"$set": { "last_test": time.time() }}, upsert=True) logger.info( "[WebUI-MongoDBPreferences] updated connection test item") self.is_connected = True logger.info( "[WebUI-MongoDBPreferences] database connection established") except Exception as exp: logger.error("[WebUI-MongoDBPreferences] Exception: %s", str(exp)) logger.debug( "[WebUI-MongoDBPreferences] Back trace of this kill: %s", traceback.format_exc()) # Depending on exception type, should raise ... self.is_connected = False raise return self.is_connected
def get_ui_user_preference(self, user, key): if not self.db: logger.error("Problem during init phase") return None if not user: logger.error("error get_ui_user_preference::no user") return None return self._get_ui_user_preference(user.get_name(), key)
def __init__(self, modconf): BaseModule.__init__(self, modconf) try: self.username = getattr(modconf, 'username', 'anonymous') self.password = getattr(modconf, 'password', '') self.port = int(getattr(modconf, 'port', '7760')) self.host = getattr(modconf, 'host', '0.0.0.0') except AttributeError: logger.error("[Ws_arbiter] The module is missing a property, check module declaration in shinken-specific.cfg") raise
def __init__(self, modconf): BaseModule.__init__(self, modconf) logger.debug('[hokuto-log-cacher] Initializing') self.regen = Regenerator() # TODO: Keep this ? seems useless self.db_path = getattr(modconf, 'db_path', None) if self.db_path is None: logger.error( '[hokuto-log-cacher] No database path configured. Please specify one with db_path in the module configuration file.' ) raise
def manage_finished_checks(self): to_del = [] for action in self.checks: to_del.append(action) try: # Under android we got a queue here self.returns_queue.put(action) except IOError, exp: logger.error("[Android SMS] %d exiting: %s" % (self.id, str(exp))) sys.exit(2)
def send_packet(self, p): try: self.con.sendall(p) except IOError, err: logger.error("[Graphite broker] Failed sending data to the Graphite Carbon instance ! Trying to reconnect ... ") try: self.init() self.con.sendall(p) except IOError: raise
def init(self): logger.info( "[Mongodb Module]: Try to open a Mongodb connection to %s:%s" % (self.uri, self.database)) try: self.con = Connection(self.uri) self.db = getattr(self.con, self.database) except Exception, e: logger.error("Mongodb Module: Error %s:" % e) raise
def create_producer(self): try: self.producer = Producer(channel=self.channel, exchange=self.exchange, routing_key=self.virtual_host) except: func = sys._getframe(1).f_code.co_name error = str(sys.exc_info()[0]) logger.error("[Canopsis] Unexpected error: %s in %s" % (error, func)) return False
def disconnect(self): try: if self.connected(): self.connection.release() return True except: func = sys._getframe(1).f_code.co_name error = str(sys.exc_info()[0]) logger.error("[Canopsis] Unexpected error: %s in %s" % (error, func)) return False
def is_correct(self): b = True for dr in self.dateranges: b &= dr.is_correct() # Even one invalid is non correct for e in self.invalid_entries: b = False logger.error("[timeperiod::%s] invalid entry '%s'" % (self.get_name(), e)) return b