def setUp(self): self.c = MongoDb(logging.getLogger(__file__)) # Start 2 instances of Mongo in a replica set dir1 = mkdtemp() dir2 = mkdtemp() try: self.p1 = subprocess.Popen(["mongod", "--dbpath", dir1, "--port", str(PORT1), "--replSet", "testset/%s:%d" % (socket.gethostname(), PORT2), "--rest"], executable="mongod", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # Sleep until mongo comes online self.wait4mongo(self.p1, PORT1) if self.p1: # Set up replication c1 = pymongo.Connection('localhost:%s' % PORT1, slave_okay=True) self.p2 = subprocess.Popen(["mongod", "--dbpath", dir2, "--port", str(PORT2), "--replSet", "testset/%s:%d" % (socket.gethostname(), PORT1), "--rest"], executable="mongod", stdout=subprocess.PIPE, stderr=subprocess.PIPE) self.wait4mongo(self.p2, PORT2) # Waiting before all members are online time.sleep(15) c1.admin.command("replSetInitiate") # Sleep for 15s until replication is stable time.sleep(30) x = c1.admin.command("replSetGetStatus") assert pymongo.Connection('localhost:%s' % PORT2) except: logging.getLogger().exception("Cannot instantiate mongod properly")
def setUp(self): self.c = MongoDb(logging.getLogger(__file__)) # Start 2 instances of Mongo in a replica set dir1 = mkdtemp() dir2 = mkdtemp() try: self.p1 = subprocess.Popen([ "mongod", "--dbpath", dir1, "--port", str(PORT1), "--replSet", "testset/%s:%d" % (socket.gethostname(), PORT2), "--rest" ], executable="mongod", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # Sleep until mongo comes online self.wait4mongo(self.p1, PORT1) if self.p1: # Set up replication c1 = pymongo.Connection('localhost:%s' % PORT1, slave_okay=True) self.p2 = subprocess.Popen([ "mongod", "--dbpath", dir2, "--port", str(PORT2), "--replSet", "testset/%s:%d" % (socket.gethostname(), PORT1), "--rest" ], executable="mongod", stdout=subprocess.PIPE, stderr=subprocess.PIPE) self.wait4mongo(self.p2, PORT2) # Waiting before all members are online time.sleep(15) c1.admin.command("replSetInitiate") # Sleep for 15s until replication is stable time.sleep(30) x = c1.admin.command("replSetGetStatus") assert pymongo.Connection('localhost:%s' % PORT2) except: logging.getLogger().exception("Cannot instantiate mongod properly")
def __init__(self, agentConfig, emitters): self.agentConfig = agentConfig self.plugins = None self.emitters = emitters self.os = None self.checksLogger = logging.getLogger('checks') socket.setdefaulttimeout(15) self._apache = Apache(self.checksLogger) self._nginx = Nginx(self.checksLogger) self._disk = Disk(self.checksLogger) self._io = IO() self._load = Load(self.checksLogger) self._memory = Memory(self.checksLogger) self._network = Network(self.checksLogger) self._processes = Processes() self._cpu = Cpu() self._couchdb = CouchDb(self.checksLogger) self._mongodb = MongoDb(self.checksLogger) self._mysql = MySql(self.checksLogger) self._pgsql = PostgreSql(self.checksLogger) self._rabbitmq = RabbitMq() self._ganglia = Ganglia(self.checksLogger) self._cassandra = Cassandra() self._redis = Redis(self.checksLogger) self._jvm = Jvm(self.checksLogger) self._tomcat = Tomcat(self.checksLogger) self._activemq = ActiveMQ(self.checksLogger) self._solr = Solr(self.checksLogger) self._memcache = Memcache(self.checksLogger) self._dogstream = Dogstreams.init(self.checksLogger, self.agentConfig) self._ddforwarder = DdForwarder(self.checksLogger, self.agentConfig) # All new checks should be metrics checks: self._metrics_checks = [ Cacti(self.checksLogger), Redis(self.checksLogger), Varnish(self.checksLogger), ElasticSearch(self.checksLogger), ] self._event_checks = [Hudson(), Nagios(socket.gethostname())] self._resources_checks = [ResProcesses(self.checksLogger,self.agentConfig)] self._ec2 = EC2(self.checksLogger)
class TestMongo(unittest.TestCase): def wait4mongo(self, process, port): # Somehow process.communicate() hangs out = process.stdout loop = 0 while True: l = out.readline() if l.find("[initandlisten] waiting for connections on port") > -1: break else: time.sleep(0.1) loop += 1 if loop >= MAX_WAIT: break def setUp(self): self.c = MongoDb(logging.getLogger(__file__)) # Start 2 instances of Mongo in a replica set dir1 = mkdtemp() dir2 = mkdtemp() try: self.p1 = subprocess.Popen(["mongod", "--dbpath", dir1, "--port", str(PORT1), "--replSet", "testset/%s:%d" % (socket.gethostname(), PORT2), "--rest"], executable="mongod", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # Sleep until mongo comes online self.wait4mongo(self.p1, PORT1) if self.p1: # Set up replication c1 = pymongo.Connection('localhost:%s' % PORT1, slave_okay=True) self.p2 = subprocess.Popen(["mongod", "--dbpath", dir2, "--port", str(PORT2), "--replSet", "testset/%s:%d" % (socket.gethostname(), PORT1), "--rest"], executable="mongod", stdout=subprocess.PIPE, stderr=subprocess.PIPE) self.wait4mongo(self.p2, PORT2) # Waiting before all members are online time.sleep(15) c1.admin.command("replSetInitiate") # Sleep for 15s until replication is stable time.sleep(30) x = c1.admin.command("replSetGetStatus") assert pymongo.Connection('localhost:%s' % PORT2) except: logging.getLogger().exception("Cannot instantiate mongod properly") def tearDown(self): try: if "p1" in dir(self): self.p1.terminate() if "p2" in dir(self): self.p2.terminate() except: logging.getLogger().exception("Cannot terminate mongod instances") def testCheck(self): r = self.c.check({"MongoDBServer": "mongodb://localhost:%s/test" % PORT1, "apiKey": "abc123"}) self.assertEquals(r and r["connections"]["current"] >= 1, True) assert r["connections"]["available"] >= 1 assert r["uptime"] >= 0, r assert r["mem"]["resident"] > 0 assert r["mem"]["virtual"] > 0 assert "replSet" in r r = self.c.check({"MongoDBServer": "mongodb://localhost:%s/test" % PORT2, "apiKey": "abc123"}) self.assertEquals(r and r["connections"]["current"] >= 1, True) assert r["connections"]["available"] >= 1 assert r["uptime"] >= 0, r assert r["mem"]["resident"] > 0 assert r["mem"]["virtual"] > 0 assert "replSet" in r
class TestMongo(unittest.TestCase): def wait4mongo(self, process, port): # Somehow process.communicate() hangs out = process.stdout loop = 0 while True: l = out.readline() if l.find("[initandlisten] waiting for connections on port") > -1: break else: time.sleep(0.1) loop += 1 if loop >= MAX_WAIT: break def setUp(self): self.c = MongoDb(logging.getLogger(__file__)) # Start 2 instances of Mongo in a replica set dir1 = mkdtemp() dir2 = mkdtemp() try: self.p1 = subprocess.Popen([ "mongod", "--dbpath", dir1, "--port", str(PORT1), "--replSet", "testset/%s:%d" % (socket.gethostname(), PORT2), "--rest" ], executable="mongod", stdout=subprocess.PIPE, stderr=subprocess.PIPE) # Sleep until mongo comes online self.wait4mongo(self.p1, PORT1) if self.p1: # Set up replication c1 = pymongo.Connection('localhost:%s' % PORT1, slave_okay=True) self.p2 = subprocess.Popen([ "mongod", "--dbpath", dir2, "--port", str(PORT2), "--replSet", "testset/%s:%d" % (socket.gethostname(), PORT1), "--rest" ], executable="mongod", stdout=subprocess.PIPE, stderr=subprocess.PIPE) self.wait4mongo(self.p2, PORT2) # Waiting before all members are online time.sleep(15) c1.admin.command("replSetInitiate") # Sleep for 15s until replication is stable time.sleep(30) x = c1.admin.command("replSetGetStatus") assert pymongo.Connection('localhost:%s' % PORT2) except: logging.getLogger().exception("Cannot instantiate mongod properly") def tearDown(self): try: if "p1" in dir(self): self.p1.terminate() if "p2" in dir(self): self.p2.terminate() except: logging.getLogger().exception("Cannot terminate mongod instances") def testCheck(self): r = self.c.check({ "mongodb_server": "mongodb://localhost:%s/test" % PORT1, "api_key": "abc123" }) self.assertEquals(r and r["connections"]["current"] >= 1, True) assert r["connections"]["available"] >= 1 assert r["uptime"] >= 0, r assert r["mem"]["resident"] > 0 assert r["mem"]["virtual"] > 0 assert "replSet" in r r = self.c.check({ "mongodb_server": "mongodb://localhost:%s/test" % PORT2, "api_key": "abc123" }) self.assertEquals(r and r["connections"]["current"] >= 1, True) assert r["connections"]["available"] >= 1 assert r["uptime"] >= 0, r assert r["mem"]["resident"] > 0 assert r["mem"]["virtual"] > 0 assert "replSet" in r
def __init__(self, agentConfig, emitters, systemStats): self.agentConfig = agentConfig # system stats is generated by config.get_system_stats self.agentConfig['system_stats'] = systemStats # agent config is used during checks, system_stats can be accessed through the config self.os = getOS() self.plugins = None self.emitters = emitters self.metadata_interval = int( agentConfig.get('metadata_interval', 10 * 60)) self.metadata_start = time.time() socket.setdefaulttimeout(15) self.run_count = 0 self.continue_running = True # Unix System Checks self._unix_system_checks = { 'disk': u.Disk(checks_logger), 'io': u.IO(), 'load': u.Load(checks_logger), 'memory': u.Memory(checks_logger), 'network': u.Network(checks_logger), 'processes': u.Processes(), 'cpu': u.Cpu(checks_logger) } # Win32 System `Checks self._win32_system_checks = { 'disk': w32.Disk(checks_logger), 'io': w32.IO(checks_logger), 'proc': w32.Processes(checks_logger), 'memory': w32.Memory(checks_logger), 'network': w32.Network(checks_logger), 'cpu': w32.Cpu(checks_logger) } # Old-style metric checks self._couchdb = CouchDb(checks_logger) self._mongodb = MongoDb(checks_logger) self._mysql = MySql(checks_logger) self._rabbitmq = RabbitMq() self._ganglia = Ganglia(checks_logger) self._cassandra = Cassandra() self._dogstream = Dogstreams.init(checks_logger, self.agentConfig) self._ddforwarder = DdForwarder(checks_logger, self.agentConfig) self._ec2 = EC2(checks_logger) # Metric Checks self._metrics_checks = [ ElasticSearch(checks_logger), Jvm(checks_logger), Tomcat(checks_logger), ActiveMQ(checks_logger), Solr(checks_logger), WMICheck(checks_logger), Memcache(checks_logger), ] # Custom metric checks for module_spec in [ s.strip() for s in self.agentConfig.get('custom_checks', '').split(',') ]: if len(module_spec) == 0: continue try: self._metrics_checks.append( modules.load(module_spec, 'Check')(checks_logger)) logger.info("Registered custom check %s" % module_spec) except Exception, e: logger.exception('Unable to load custom check module %s' % module_spec)
class checks(object): def __init__(self, agentConfig, emitters): self.agentConfig = agentConfig self.plugins = None self.emitters = emitters self.os = None self.checksLogger = logging.getLogger('checks') socket.setdefaulttimeout(15) self._apache = Apache(self.checksLogger) self._nginx = Nginx(self.checksLogger) self._disk = Disk(self.checksLogger) self._io = IO() self._load = Load(self.checksLogger) self._memory = Memory(self.checksLogger) self._network = Network(self.checksLogger) self._processes = Processes() self._cpu = Cpu() self._couchdb = CouchDb(self.checksLogger) self._mongodb = MongoDb(self.checksLogger) self._mysql = MySql(self.checksLogger) self._pgsql = PostgreSql(self.checksLogger) self._rabbitmq = RabbitMq() self._ganglia = Ganglia(self.checksLogger) self._cassandra = Cassandra() self._redis = Redis(self.checksLogger) self._jvm = Jvm(self.checksLogger) self._tomcat = Tomcat(self.checksLogger) self._activemq = ActiveMQ(self.checksLogger) self._solr = Solr(self.checksLogger) self._memcache = Memcache(self.checksLogger) self._dogstream = Dogstreams.init(self.checksLogger, self.agentConfig) self._ddforwarder = DdForwarder(self.checksLogger, self.agentConfig) # All new checks should be metrics checks: self._metrics_checks = [ Cacti(self.checksLogger), Redis(self.checksLogger), Varnish(self.checksLogger), ElasticSearch(self.checksLogger), ] self._event_checks = [Hudson(), Nagios(socket.gethostname())] self._resources_checks = [ResProcesses(self.checksLogger,self.agentConfig)] self._ec2 = EC2(self.checksLogger) # # Checks - FIXME migrating to the new Check interface is a WIP # @recordsize def getApacheStatus(self): return self._apache.check(self.agentConfig) @recordsize def getCouchDBStatus(self): return self._couchdb.check(self.agentConfig) @recordsize def getDiskUsage(self): return self._disk.check(self.agentConfig) @recordsize def getIOStats(self): return self._io.check(self.checksLogger, self.agentConfig) @recordsize def getLoadAvrgs(self): return self._load.check(self.agentConfig) @recordsize def getMemoryUsage(self): return self._memory.check(self.agentConfig) @recordsize def getMongoDBStatus(self): return self._mongodb.check(self.agentConfig) @recordsize def getMySQLStatus(self): return self._mysql.check(self.agentConfig) @recordsize def getPgSQLStatus(self): return self._pgsql.check(self.agentConfig) @recordsize def getNetworkTraffic(self): return self._network.check(self.agentConfig) @recordsize def getNginxStatus(self): return self._nginx.check(self.agentConfig) @recordsize def getProcesses(self): return self._processes.check(self.checksLogger, self.agentConfig) @recordsize def getRabbitMQStatus(self): return self._rabbitmq.check(self.checksLogger, self.agentConfig) @recordsize def getGangliaData(self): return self._ganglia.check(self.agentConfig) @recordsize def getCassandraData(self): return self._cassandra.check(self.checksLogger, self.agentConfig) @recordsize def getJvmData(self): return self._jvm.check(self.agentConfig) @recordsize def getTomcatData(self): return self._tomcat.check(self.agentConfig) @recordsize def getActiveMQData(self): return self._activemq.check(self.agentConfig) @recordsize def getSolrData(self): return self._solr.check(self.agentConfig) @recordsize def getMemcacheData(self): return self._memcache.check(self.agentConfig) @recordsize def getDogstreamData(self): return self._dogstream.check(self.agentConfig) @recordsize def getDdforwarderData(self): return self._ddforwarder.check(self.agentConfig) @recordsize def getCPUStats(self): return self._cpu.check(self.checksLogger, self.agentConfig) @recordsize def get_metadata(self): metadata = self._ec2.get_metadata() if metadata.get('hostname'): metadata['ec2-hostname'] = metadata.get('hostname') if self.agentConfig.get('hostname'): metadata['agent-hostname'] = self.agentConfig.get('hostname') try: metadata["hostname"] = socket.gethostname() except: pass try: metadata["fqdn"] = socket.getfqdn() except: pass return metadata def doChecks(self, firstRun=False, systemStats=False): """Actual work """ self.checksLogger.info("Starting checks") apacheStatus = self.getApacheStatus() diskUsage = self.getDiskUsage() loadAvrgs = self.getLoadAvrgs() memory = self.getMemoryUsage() mysqlStatus = self.getMySQLStatus() pgsqlStatus = self.getPgSQLStatus() networkTraffic = self.getNetworkTraffic() nginxStatus = self.getNginxStatus() processes = self.getProcesses() rabbitmq = self.getRabbitMQStatus() mongodb = self.getMongoDBStatus() couchdb = self.getCouchDBStatus() ioStats = self.getIOStats() cpuStats = self.getCPUStats() gangliaData = self.getGangliaData() cassandraData = self.getCassandraData() jvmData = self.getJvmData() tomcatData = self.getTomcatData() activeMQData = self.getActiveMQData() solrData = self.getSolrData() memcacheData = self.getMemcacheData() dogstreamData = self.getDogstreamData() ddforwarderData = self.getDdforwarderData() checksData = { 'collection_timestamp': time.time(), 'os' : self.os, 'python': sys.version, 'agentVersion' : self.agentConfig['version'], 'loadAvrg1' : loadAvrgs['1'], 'loadAvrg5' : loadAvrgs['5'], 'loadAvrg15' : loadAvrgs['15'], 'memPhysUsed' : memory.get('physUsed'), 'memPhysFree' : memory.get('physFree'), 'memPhysTotal' : memory.get('physTotal'), 'memPhysUsable' : memory.get('physUsable'), 'memSwapUsed' : memory.get('swapUsed'), 'memSwapFree' : memory.get('swapFree'), 'memSwapTotal' : memory.get('swapTotal'), 'memCached' : memory.get('physCached'), 'memBuffers': memory.get('physBuffers'), 'memShared': memory.get('physShared'), 'networkTraffic' : networkTraffic, 'processes' : processes, 'apiKey': self.agentConfig['apiKey'], 'events': {}, 'resources': {}, } if diskUsage is not False and len(diskUsage) == 2: checksData["diskUsage"] = diskUsage[0] checksData["inodes"] = diskUsage[1] if cpuStats is not False and cpuStats is not None: checksData.update(cpuStats) if gangliaData is not False and gangliaData is not None: checksData['ganglia'] = gangliaData if cassandraData is not False and cassandraData is not None: checksData['cassandra'] = cassandraData # Apache Status if apacheStatus: checksData.update(apacheStatus) # MySQL Status if mysqlStatus: checksData.update(mysqlStatus) # PostgreSQL status if pgsqlStatus: checksData['postgresql'] = pgsqlStatus # Nginx Status if nginxStatus: checksData.update(nginxStatus) # RabbitMQ if rabbitmq: checksData['rabbitMQ'] = rabbitmq # MongoDB if mongodb: if mongodb.has_key('events'): checksData['events']['Mongo'] = mongodb['events']['Mongo'] del mongodb['events'] checksData['mongoDB'] = mongodb # CouchDB if couchdb: checksData['couchDB'] = couchdb if ioStats: checksData['ioStats'] = ioStats if jvmData: checksData['jvm'] = jvmData if tomcatData: checksData['tomcat'] = tomcatData if activeMQData: checksData['activemq'] = activeMQData if solrData: checksData['solr'] = solrData if memcacheData: checksData['memcache'] = memcacheData if dogstreamData: dogstreamEvents = dogstreamData.get('dogstreamEvents', None) if dogstreamEvents: if 'dogstream' in checksData['events']: checksData['events']['dogstream'].extend(dogstreamEvents) else: checksData['events']['dogstream'] = dogstreamEvents del dogstreamData['dogstreamEvents'] checksData.update(dogstreamData) if ddforwarderData: checksData['datadog'] = ddforwarderData # Include server indentifiers checksData['internalHostname'] = gethostname(self.agentConfig) checksData['uuid'] = getUuid() self.checksLogger.debug('doChecks: added uuid %s' % checksData['uuid']) # Process the event checks. for event_check in self._event_checks: event_data = event_check.check(self.checksLogger, self.agentConfig) if event_data: checksData['events'][event_check.key] = event_data # Include system stats on first postback if firstRun: checksData['systemStats'] = systemStats # Add static tags from the configuration file if self.agentConfig['tags'] is not None: checksData['tags'] = self.agentConfig['tags'] # Also post an event in the newsfeed checksData['events']['System'] = [{'api_key': self.agentConfig['apiKey'], 'host': checksData['internalHostname'], 'timestamp': int(time.mktime(datetime.datetime.now().timetuple())), 'event_type':'Agent Startup', 'msg_text': 'Version %s' % get_version() }] # Collect metadata checksData['meta'] = self.get_metadata() # Resources checks has_resource = False for resources_check in self._resources_checks: resources_check.check() snaps = resources_check.pop_snapshots() if snaps: has_resource = True res_value = { 'snaps': snaps, 'format_version': resources_check.get_format_version() } res_format = resources_check.describe_format_if_needed() if res_format is not None: res_value['format_description'] = res_format checksData['resources'][resources_check.RESOURCE_KEY] = res_value if has_resource: checksData['resources']['meta'] = { 'api_key': self.agentConfig['apiKey'], 'host': checksData['internalHostname'], } metrics = [] for metrics_check in self._metrics_checks: res = metrics_check.check(self.agentConfig) if res: metrics.extend(res) checksData['metrics'] = metrics # Send back data self.checksLogger.debug("checksData: %s" % checksData) for emitter in self.emitters: emitter(checksData, self.checksLogger, self.agentConfig) self.checksLogger.info("Checks done")