Example #1
0
    def __init__(self, agentConfig, emitters, systemStats):
        self.emit_duration = None
        self.agentConfig = agentConfig
        # system stats is generated by config.get_system_stats
        self.agentConfig["system_stats"] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters
        self.metadata_interval = int(agentConfig.get("metadata_interval", 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.metadata_cache = None
        self.initialized_checks_d = []
        self.init_failed_checks_d = []

        # Unix System Checks
        self._unix_system_checks = {
            "disk": u.Disk(log),
            "io": u.IO(log),
            "load": u.Load(log),
            "memory": u.Memory(log),
            "processes": u.Processes(log),
            "cpu": u.Cpu(log),
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            "disk": w32.Disk(log),
            "io": w32.IO(log),
            "proc": w32.Processes(log),
            "memory": w32.Memory(log),
            "network": w32.Network(log),
            "cpu": w32.Cpu(log),
        }

        # Old-style metric checks
        self._ganglia = Ganglia(log)
        self._dogstream = Dogstreams.init(log, self.agentConfig)
        self._ddforwarder = DdForwarder(log, self.agentConfig)

        # Agent Metrics
        self._agent_metrics = CollectorMetrics(log)

        self._metrics_checks = []

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get("custom_checks", "").split(",")]:
            if len(module_spec) == 0:
                continue
            try:
                self._metrics_checks.append(modules.load(module_spec, "Check")(log))
                log.info("Registered custom check %s" % module_spec)
                log.warning(
                    "Old format custom checks are deprecated. They should be moved to the checks.d interface as old custom checks will be removed in a next version"
                )
            except Exception, e:
                log.exception("Unable to load custom check module %s" % module_spec)
Example #2
0
    def test_dogstream_events(self):
        log_data = [
            '2012-05-14 12:46:01 [ERROR] - host0 is down (broke its collarbone)',
            '2012-05-14 12:48:07 [ERROR] - host1 is down (got a bloody nose)',
            '2012-05-14 12:52:03 [RECOVERY] - host0 is up (collarbone healed)',
            '2012-05-14 12:59:09 [RECOVERY] - host1 is up (nose stopped bleeding)',
        ]
        expected_output = {
            "dogstreamEvents": [
                {
                    "timestamp": 1336999561,
                    "alert_type": "error",
                    "host": "host0",
                    "msg_title": "host0 is down (broke its collarbone)",
                    "msg_text": "2012-05-14 12:46:01 [ERROR] - host0 is down (broke its collarbone)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999687,
                    "alert_type": "error",
                    "host": "host1",
                    "msg_title": "host1 is down (got a bloody nose)",
                    "msg_text": "2012-05-14 12:48:07 [ERROR] - host1 is down (got a bloody nose)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999923,
                    "alert_type": "success",
                    "host": "host0",
                    "msg_title": "host0 is up (collarbone healed)",
                    "msg_text": "2012-05-14 12:52:03 [RECOVERY] - host0 is up (collarbone healed)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1337000349,
                    "alert_type": "success",
                    "host": "host1",
                    "msg_title": "host1 is up (nose stopped bleeding)",
                    "msg_text": "2012-05-14 12:59:09 [RECOVERY] - host1 is up (nose stopped bleeding)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

            ]
        }
        self._write_log(log_data)

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:parse_events'.format(self.log_file.name, __name__)})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #3
0
    def test_dogstream_events_validation(self):
        log_data = [
            {"msg_title": "title", "timestamp": 1336999561},
            {"msg_text": "body", "timestamp": 1336999561},
            {"none of the above": "should get filtered out", "timestamp": 1336999561},
        ]

        expected_output = {
            "dogstreamEvents": [
                {
                    "timestamp": 1336999561,
                    "msg_title": "title",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999561,
                    "msg_text": "body",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

            ]
        }

        self._write_log([repr(d) for d in log_data])

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:repr_event_parser'.format(self.log_file.name, __name__)})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #4
0
    def test_dogstream_events_validation(self):
        log_data = [
            {"msg_title": "title", "timestamp": 1336999561},
            {"msg_text": "body", "timestamp": 1336999561},
            {"none of the above": "should get filtered out", "timestamp": 1336999561},
        ]

        expected_output = {
            "dogstreamEvents": [
                {
                    "timestamp": 1336999561,
                    "msg_title": "title",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999561,
                    "msg_text": "body",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

            ]
        }

        self._write_log([repr(d) for d in log_data])

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:repr_event_parser'.format(self.log_file.name, __name__)})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #5
0
    def test_dogstream_events(self):
        log_data = [
            '2012-05-14 12:46:01 [ERROR] - host0 is down (broke its collarbone)',
            '2012-05-14 12:48:07 [ERROR] - host1 is down (got a bloody nose)',
            '2012-05-14 12:52:03 [RECOVERY] - host0 is up (collarbone healed)',
            '2012-05-14 12:59:09 [RECOVERY] - host1 is up (nose stopped bleeding)',
        ]
        expected_output = {
            "dogstreamEvents": [
                {
                    "timestamp": 1336999561,
                    "alert_type": "error",
                    "host": "host0",
                    "msg_title": "host0 is down (broke its collarbone)",
                    "msg_text": "2012-05-14 12:46:01 [ERROR] - host0 is down (broke its collarbone)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999687,
                    "alert_type": "error",
                    "host": "host1",
                    "msg_title": "host1 is down (got a bloody nose)",
                    "msg_text": "2012-05-14 12:48:07 [ERROR] - host1 is down (got a bloody nose)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999923,
                    "alert_type": "success",
                    "host": "host0",
                    "msg_title": "host0 is up (collarbone healed)",
                    "msg_text": "2012-05-14 12:52:03 [RECOVERY] - host0 is up (collarbone healed)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1337000349,
                    "alert_type": "success",
                    "host": "host1",
                    "msg_title": "host1 is up (nose stopped bleeding)",
                    "msg_text": "2012-05-14 12:59:09 [RECOVERY] - host1 is up (nose stopped bleeding)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

            ]
        }
        self._write_log(log_data)

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:parse_events'.format(self.log_file.name, __name__)})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #6
0
    def __init__(self, agentConfig, emitters, systemStats):
        self.emit_duration = None
        self.agentConfig = agentConfig
        # system stats is generated by config.get_system_stats
        self.agentConfig['system_stats'] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters            
        self.metadata_interval = int(agentConfig.get('metadata_interval', 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.metadata_cache = None
        self.checks_d = []
        
        # Unix System Checks
        self._unix_system_checks = {
            'disk': u.Disk(log),
            'io': u.IO(log),
            'load': u.Load(log),
            'memory': u.Memory(log),
            'processes': u.Processes(log),
            'cpu': u.Cpu(log)
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            'disk': w32.Disk(log),
            'io': w32.IO(log),
            'proc': w32.Processes(log),
            'memory': w32.Memory(log),
            'network': w32.Network(log),
            'cpu': w32.Cpu(log)
        }

        # Old-style metric checks
        self._ganglia = Ganglia(log)
        self._cassandra = Cassandra()
        self._dogstream = Dogstreams.init(log, self.agentConfig)
        self._ddforwarder = DdForwarder(log, self.agentConfig)

        # Agent Metrics
        self._agent_metrics = CollectorMetrics(log)

        # Metric Checks
        self._metrics_checks = [
            Memcache(log),
        ]

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]:
            if len(module_spec) == 0: continue
            try:
                self._metrics_checks.append(modules.load(module_spec, 'Check')(log))
                log.info("Registered custom check %s" % module_spec)
            except Exception, e:
                log.exception('Unable to load custom check module %s' % module_spec)
Example #7
0
    def setUp(self):
        TailTestCase.setUp(self)

        self.config = {
            'dogstreams': self.log_file.name,
            'check_freq': 5,
        }
        log.info("Test config: %s" % self.config)
        self.dogstream = Dogstreams.init(self.logger, self.config)
        self.maxDiff = None
Example #8
0
    def test_host_perfdata(self):
        from checks.datadog import NagiosHostPerfData

        self._write_nagios_config([
            "host_perfdata_file=%s" % self.log_file.name,
            "host_perfdata_file_template=DATATYPE::HOSTPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tHOSTPERFDATA::$HOSTPERFDATA$\tHOSTCHECKCOMMAND::$HOSTCHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$",
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosHostPerfData], [d.__class__ for d in dogstream.dogstreams])

        log_data = [
            ("DATATYPE::HOSTPERFDATA", 
             "TIMET::1000000010", 
             "HOSTNAME::myhost1", 
             "HOSTPERFDATA::" + " ".join([
                "rta=0.978000ms;5000.000000;5000.000000;0.000000", 
                "pl=0%;100;100;0", 
             ]),
             "HOSTCHECKCOMMAND::check-host-alive",
             "HOSTSTATE::UP",   
             "HOSTSTATETYPE::HARD",
            ),
        ]
        
        expected_output = [
            ('nagios.host.rta', 1000000010, 0.978, {
                'metric_type': 'gauge',
                'host_name': 'myhost1',
                'unit': 'ms',
                'warn': '5000.000000',
                'crit': '5000.000000',
                'min': '0.000000'
            }),
            ('nagios.host.pl',  1000000010, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost1',
                'unit': '%',
                'warn': '100',
                'crit': '100',
                'min': '0'
            }),
        ]
        expected_output.sort(key=point_sorter)

        self._write_log(('\t'.join(data) for data in log_data))        

        actual_output = dogstream.check(self.agent_config, move_end=False)['dogstream']
        actual_output.sort(key=point_sorter)

        self.assertEquals(expected_output, actual_output)
Example #9
0
 def test_dogstream_ancient_function_plugin(self):
     """Ensure that pre-stateful plugins still work"""
     log_data = [
         'test.metric.simple 1000000000 1 metric_type=gauge',
         'test.metric.simple 1100000000 1 metric_type=gauge'
     ]
     expected_output = {
         "dogstream": [
             ('test.metric.simple', 1000000000, 1, self.gauge),
             ('test.metric.simple', 1100000000, 1, self.gauge)]
     }
     self._write_log(log_data)
     plugdog = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:parse_ancient_function_plugin'.format(self.log_file.name, __name__)})
     actual_output = plugdog.check(self.config, move_end=False)
Example #10
0
    def __init__(self, agentConfig, emitters):
        self.agentConfig = agentConfig
        self.plugins = None
        self.emitters = emitters
        self.os = None

        self.checksLogger = logging.getLogger("checks")
        socket.setdefaulttimeout(15)

        self._apache = Apache(self.checksLogger)
        self._nginx = Nginx(self.checksLogger)
        self._disk = Disk(self.checksLogger)
        self._io = IO()
        self._load = Load(self.checksLogger)
        self._memory = Memory(self.checksLogger)
        self._network = Network(self.checksLogger)
        self._processes = Processes()
        self._cpu = Cpu()
        self._couchdb = CouchDb(self.checksLogger)
        self._mongodb = MongoDb(self.checksLogger)
        self._mysql = MySql(self.checksLogger)
        self._pgsql = PostgreSql(self.checksLogger)
        self._rabbitmq = RabbitMq()
        self._ganglia = Ganglia(self.checksLogger)
        self._cassandra = Cassandra()
        self._redis = Redis(self.checksLogger)
        self._jvm = Jvm(self.checksLogger)
        self._tomcat = Tomcat(self.checksLogger)
        self._activemq = ActiveMQ(self.checksLogger)
        self._solr = Solr(self.checksLogger)
        self._memcache = Memcache(self.checksLogger)
        self._dogstream = Dogstreams.init(self.checksLogger, self.agentConfig)
        self._ddforwarder = DdForwarder(self.checksLogger, self.agentConfig)

        # All new checks should be metrics checks:
        self._metrics_checks = [
            Cacti(self.checksLogger),
            Redis(self.checksLogger),
            Varnish(self.checksLogger),
            ElasticSearch(self.checksLogger),
        ]

        for module_spec in [s.strip() for s in self.agentConfig.get("custom_checks", "").split(",")]:
            if len(module_spec) == 0:
                continue
            try:
                self._metrics_checks.append(modules.load(module_spec, "Check")(self.checksLogger))
                self.checksLogger.info("Registered custom check %s" % module_spec)
            except Exception, e:
                self.checksLogger.exception("Unable to load custom check module %s" % module_spec)
Example #11
0
 def test_dogstream_ancient_function_plugin(self):
     """Ensure that pre-stateful plugins still work"""
     log_data = [
         'test.metric.simple 1000000000 1 metric_type=gauge',
         'test.metric.simple 1100000000 1 metric_type=gauge'
     ]
     expected_output = {
         "dogstream": [
             ('test.metric.simple', 1000000000, 1, self.gauge),
             ('test.metric.simple', 1100000000, 1, self.gauge)]
     }
     self._write_log(log_data)
     plugdog = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:parse_ancient_function_plugin'.format(self.log_file.name, __name__)})
     actual_output = plugdog.check(self.config, move_end=False)
Example #12
0
    def test_alt_host_perfdata(self):
        from checks.datadog import NagiosHostPerfData

        self._write_nagios_config([
            "host_perfdata_file=%s" % NAGIOS_TEST_HOST,
            "host_perfdata_file_template=%s" % NAGIOS_TEST_HOST_TEMPLATE,
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosHostPerfData], [d.__class__ for d in dogstream.dogstreams])
        actual_output = dogstream.check(self.agent_config, move_end=False)

        expected_output = {'dogstream': [('nagios.host.pl', 1339511440, 0.0, {'warn': '80', 'metric_type': 'gauge', 'host_name': 'localhost', 'min': '0', 'crit': '100', 'unit': '%'}), ('nagios.host.rta', 1339511440, 0.048, {'warn': '3000.000000', 'metric_type': 'gauge', 'host_name': 'localhost', 'min': '0.000000', 'crit': '5000.000000', 'unit': 'ms'})]}
        self.assertEquals(expected_output, actual_output)
Example #13
0
    def test_alt_service_perfdata(self):
        from checks.datadog import NagiosServicePerfData

        self._write_nagios_config([
            "service_perfdata_file=%s" % NAGIOS_TEST_SVC,
            "service_perfdata_file_template=%s" % NAGIOS_TEST_SVC_TEMPLATE,
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosServicePerfData], [d.__class__ for d in dogstream.dogstreams])
        actual_output = dogstream.check(self.agent_config, move_end=False)

        expected_output = {'dogstream': [('nagios.current_users.users', 1339511440, 1.0, {'metric_type': 'gauge', 'warn': '20', 'host_name': 'localhost', 'crit': '50', 'min': '0'}), ('nagios.ping.pl', 1339511500, 0.0, {'warn': '20', 'metric_type': 'gauge', 'host_name': 'localhost', 'min': '0', 'crit': '60', 'unit': '%'}), ('nagios.ping.rta', 1339511500, 0.065, {'warn': '100.000000', 'metric_type': 'gauge', 'host_name': 'localhost', 'min': '0.000000', 'crit': '500.000000', 'unit': 'ms'}), ('nagios.root_partition', 1339511560, 2470.0, {'min': '0', 'max': '7315', 'device_name': '/', 'warn': '5852', 'metric_type': 'gauge', 'host_name': 'localhost', 'crit': '6583', 'unit': 'MB'})]}
        self.assertEquals(expected_output, actual_output)
Example #14
0
    def test_dogstream_stateful(self):
        log_data = [
            'test.metric.accumulator 1000000000 1 metric_type=counter',
            'test.metric.accumulator 1100000000 1 metric_type=counter'
        ]
        expected_output = {
            "dogstream": [
                ('test.metric.accumulator', 1000000000, 1, self.counter),
                ('test.metric.accumulator', 1100000000, 2, self.counter)]
        }
        self._write_log(log_data)

        statedog = Dogstreams.init(self.logger, {'dogstreams': '%s:tests.test_datadog:parse_stateful' % self.log_file.name})
        actual_output = statedog.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #15
0
    def test_dogstream_new_plugin(self):
        """Ensure that class-based stateful plugins work"""
        log_data = [
            'test.metric.accumulator 1000000000 1 metric_type=counter',
            'test.metric.accumulator 1100000000 1 metric_type=counter'
        ]
        expected_output = {
            "dogstream": [
                ('foo.bar:test.metric.accumulator', 1000000000, 1, self.counter),
                ('foo.bar:test.metric.accumulator', 1100000000, 2, self.counter)]
        }
        self._write_log(log_data)

        statedog = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:ParseClassPlugin:foo:bar'.format(self.log_file.name, __name__)})
        actual_output = statedog.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #16
0
    def test_dogstream_new_plugin(self):
        """Ensure that class-based stateful plugins work"""
        log_data = [
            'test.metric.accumulator 1000000000 1 metric_type=counter',
            'test.metric.accumulator 1100000000 1 metric_type=counter'
        ]
        expected_output = {
            "dogstream": [
                ('foo.bar:test.metric.accumulator', 1000000000, 1, self.counter),
                ('foo.bar:test.metric.accumulator', 1100000000, 2, self.counter)]
        }
        self._write_log(log_data)

        statedog = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:ParseClassPlugin:foo:bar'.format(self.log_file.name, __name__)})
        actual_output = statedog.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #17
0
    def test_dogstream_function_plugin(self):
        """Ensure that non-class-based stateful plugins work"""
        log_data = [
            'test.metric.accumulator 1000000000 1 metric_type=counter',
            'test.metric.accumulator 1100000000 1 metric_type=counter'
        ]
        expected_output = {
            "dogstream": [
                ('test.metric.accumulator', 1000000000, 1, self.counter),
                ('test.metric.accumulator', 1100000000, 2, self.counter)]
        }
        self._write_log(log_data)

        statedog = Dogstreams.init(self.logger, {'dogstreams': '%s:tests.test_datadog:parse_function_plugin' % self.log_file.name})
        actual_output = statedog.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #18
0
    def test_dogstream_function_plugin(self):
        """Ensure that non-class-based stateful plugins work"""
        log_data = [
            'test.metric.accumulator 1000000000 1 metric_type=counter',
            'test.metric.accumulator 1100000000 1 metric_type=counter'
        ]
        expected_output = {
            "dogstream": [
                ('test.metric.accumulator', 1000000000, 1, self.counter),
                ('test.metric.accumulator', 1100000000, 2, self.counter)]
        }
        self._write_log(log_data)

        statedog = Dogstreams.init(self.logger, {'dogstreams': '%s:tests.test_datadog:parse_function_plugin' % self.log_file.name})
        actual_output = statedog.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #19
0
    def __init__(self, agentConfig, emitters):
        self.agentConfig = agentConfig
        self.plugins = None
        self.emitters = emitters
        self.os = None
        
        self.checksLogger = logging.getLogger('checks')
        socket.setdefaulttimeout(15)
        
        self._apache = Apache(self.checksLogger)
        self._nginx = Nginx(self.checksLogger)
        self._disk = Disk(self.checksLogger)
        self._io = IO()
        self._load = Load(self.checksLogger)
        self._memory = Memory(self.checksLogger)
        self._network = Network(self.checksLogger)
        self._processes = Processes()
        self._cpu = Cpu()
        self._couchdb = CouchDb(self.checksLogger)
        self._mongodb = MongoDb(self.checksLogger)
        self._mysql = MySql(self.checksLogger)
        self._pgsql = PostgreSql(self.checksLogger)
        self._rabbitmq = RabbitMq()
        self._ganglia = Ganglia(self.checksLogger)
        self._cassandra = Cassandra()
        self._redis = Redis(self.checksLogger)
        self._jvm = Jvm(self.checksLogger)
        self._tomcat = Tomcat(self.checksLogger)
        self._activemq = ActiveMQ(self.checksLogger)
        self._solr = Solr(self.checksLogger)
        self._memcache = Memcache(self.checksLogger)
        self._dogstream = Dogstreams.init(self.checksLogger, self.agentConfig)
        self._ddforwarder = DdForwarder(self.checksLogger, self.agentConfig)

        # All new checks should be metrics checks:
        self._metrics_checks = [
            Cacti(self.checksLogger),
            Redis(self.checksLogger),
            Varnish(self.checksLogger),
            ElasticSearch(self.checksLogger),
            ]
        self._event_checks = [Hudson(), Nagios(socket.gethostname())]
        self._resources_checks = [ResProcesses(self.checksLogger,self.agentConfig)]

        self._ec2 = EC2(self.checksLogger)
Example #20
0
 def test_dogstream_log_path_globbing(self):
     """Make sure that globbed dogstream logfile matching works."""
     # Create a tmpfile to serve as a prefix for the other temporary
     # files we'll be globbing.
     first_tmpfile = NamedTemporaryFile()
     tmp_fprefix = os.path.basename(first_tmpfile.name)
     all_tmp_filenames = set([first_tmpfile.name])
     # We stick the file objects in here to avoid garbage collection (and
     # tmpfile deletion). Not sure why this was happening, but it's working
     # with this hack in.
     avoid_gc = []
     for i in range(3):
         new_tmpfile = NamedTemporaryFile(prefix=tmp_fprefix)
         all_tmp_filenames.add(new_tmpfile.name)
         avoid_gc.append(new_tmpfile)
     dogstream_glob = os.path.join(gettempdir(), tmp_fprefix + '*')
     paths = Dogstreams._get_dogstream_log_paths(dogstream_glob)
     self.assertEqual(set(paths), all_tmp_filenames)
Example #21
0
    def test_supervisord_parser(self):
        from dogstream import supervisord_log
        log_data = """2012-07-16 22:30:48,335 INFO spawned: 'monitor' with pid 20216
2012-07-14 03:02:47,325 INFO success: foo_bar entered RUNNING state, process has stayed up for > than 2 seconds (startsecs)
2012-07-17 02:53:04,600 CRIT Server 'inet_http_server' running without any HTTP authentication checking
2012-07-14 04:54:34,193 WARN received SIGTERM indicating exit request
"""
        event_type = supervisord_log.EVENT_TYPE

        expected_output = {
            "dogstreamEvents": [
                {
                    "alert_type": "info", "event_type": event_type,
                    "aggregation_key": "monitor",
                    "event_object": "monitor",
                    "msg_title": "spawned: 'monitor' with pid 20216",
                    "timestamp": int(time.mktime(datetime(2012, 7, 16, 22, 30, 48).timetuple())),
                }, {
                    "alert_type": "success", "event_type": event_type,
                    "aggregation_key": "foo_bar",
                    "event_object": "foo_bar",
                    "msg_title": "success: foo_bar entered RUNNING state, "
                    "process has stayed up for > than 2 seconds (startsecs)",
                    "timestamp": int(time.mktime(datetime(2012, 7, 14, 3, 2, 47).timetuple())),
                }, {
                    "alert_type": "error", "event_type": event_type,
                    "aggregation_key": "inet_http_server",
                    "event_object": "inet_http_server",
                    "msg_title": "Server 'inet_http_server' running without any HTTP authentication checking",
                    "timestamp": int(time.mktime(datetime(2012, 7, 17, 2, 53, 4).timetuple())),
                }, {
                    "alert_type": "warning", "event_type": event_type,
                    "aggregation_key": "SIGTERM",
                    "event_object": "SIGTERM",
                    "msg_title": "received SIGTERM indicating exit request",
                    "timestamp": int(time.mktime(datetime(2012, 7, 14, 4, 54, 34).timetuple())),
                },
            ]
        }
        self._write_log(log_data.split("\n"))

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '%s:dogstream.supervisord_log:parse_supervisord' % self.log_file.name})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #22
0
    def test_supervisord_parser(self):
        from dogstream import supervisord_log
        log_data = """2012-07-16 22:30:48,335 INFO spawned: 'monitor' with pid 20216
2012-07-14 03:02:47,325 INFO success: foo_bar entered RUNNING state, process has stayed up for > than 2 seconds (startsecs)
2012-07-17 02:53:04,600 CRIT Server 'inet_http_server' running without any HTTP authentication checking
2012-07-14 04:54:34,193 WARN received SIGTERM indicating exit request
"""
        event_type = supervisord_log.EVENT_TYPE

        expected_output = {
            "dogstreamEvents":[
                {
                    "alert_type": "info", "event_type": event_type,
                    "aggregation_key": "monitor",
                    "event_object": "monitor",
                    "msg_title": "spawned: 'monitor' with pid 20216",
                    "timestamp": int(time.mktime(datetime(2012, 7, 16, 22, 30, 48).timetuple())),
                }, {
                    "alert_type": "success", "event_type": event_type,
                    "aggregation_key": "foo_bar",
                    "event_object": "foo_bar",
                    "msg_title": "success: foo_bar entered RUNNING state, "
                    "process has stayed up for > than 2 seconds (startsecs)",
                    "timestamp": int(time.mktime(datetime(2012, 7, 14, 3, 2, 47).timetuple())),
                }, {
                    "alert_type": "error", "event_type": event_type,
                    "aggregation_key": "inet_http_server",
                    "event_object": "inet_http_server",
                    "msg_title": "Server 'inet_http_server' running without any HTTP authentication checking",
                    "timestamp": int(time.mktime(datetime(2012, 7, 17, 2, 53, 4).timetuple())),
                }, {
                    "alert_type": "warning", "event_type": event_type,
                    "aggregation_key": "SIGTERM",
                    "event_object": "SIGTERM",
                    "msg_title": "received SIGTERM indicating exit request",
                    "timestamp": int(time.mktime(datetime(2012, 7, 14, 4, 54, 34).timetuple())),
                },
            ]}
        self._write_log(log_data.split("\n"))

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '%s:dogstream.supervisord_log:parse_supervisord' % self.log_file.name})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #23
0
    def __init__(self, agentConfig, emitters, systemStats):
        self.agentConfig = agentConfig
        # system stats is generated by config.get_system_stats
        self.agentConfig['system_stats'] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = getOS()
        self.plugins = None
        self.emitters = emitters            
        self.metadata_interval = int(agentConfig.get('metadata_interval', 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        
        # Unix System Checks
        self._unix_system_checks = {
            'disk': u.Disk(checks_logger),
            'io': u.IO(),
            'load': u.Load(checks_logger),
            'memory': u.Memory(checks_logger),
            'network': u.Network(checks_logger),
            'processes': u.Processes(),
            'cpu': u.Cpu(checks_logger)
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            'disk': w32.Disk(checks_logger),
            'io': w32.IO(checks_logger),
            'proc': w32.Processes(checks_logger),
            'memory': w32.Memory(checks_logger),
            'network': w32.Network(checks_logger),
            'cpu': w32.Cpu(checks_logger)
        }

        # Old-style metric checks
        self._couchdb = CouchDb(checks_logger)
        self._mongodb = MongoDb(checks_logger)
        self._mysql = MySql(checks_logger)
        self._rabbitmq = RabbitMq()
        self._ganglia = Ganglia(checks_logger)
        self._cassandra = Cassandra()
        self._dogstream = Dogstreams.init(checks_logger, self.agentConfig)
        self._ddforwarder = DdForwarder(checks_logger, self.agentConfig)
        self._ec2 = EC2(checks_logger)

        # Metric Checks
        self._metrics_checks = [
            ElasticSearch(checks_logger),
            WMICheck(checks_logger),
            Memcache(checks_logger),
        ]

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]:
            if len(module_spec) == 0: continue
            try:
                self._metrics_checks.append(modules.load(module_spec, 'Check')(checks_logger))
                logger.info("Registered custom check %s" % module_spec)
            except Exception, e:
                logger.exception('Unable to load custom check module %s' % module_spec)
Example #24
0
    def test_service_perfdata(self):
        from checks.datadog import NagiosServicePerfData

        self._write_nagios_config([
            "service_perfdata_file=%s" % self.log_file.name,
            "service_perfdata_file_template=DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$",
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosServicePerfData],
                          [d.__class__ for d in dogstream.dogstreams])

        log_data = [
            (
                "DATATYPE::SERVICEPERFDATA",
                "TIMET::1000000000",
                "HOSTNAME::myhost0",
                "SERVICEDESC::Pgsql Backends",
                "SERVICEPERFDATA::" + " ".join([
                    "time=0.06", "db0=33;180;190;0;200", "db1=1;150;190;0;200",
                    "db2=0;120;290;1;200", "db3=0;110;195;5;100"
                ]),
                "SERVICECHECKCOMMAND::check_nrpe_1arg!check_postgres_backends",
                "HOSTSTATE::UP",
                "HOSTSTATETYPE::HARD",
                "SERVICESTATE::OK",
                "SERVICESTATETYPE::HARD",
            ),
        ]

        expected_output = [
            ('nagios.pgsql_backends.time', 1000000000, 0.06, {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
            }),
            ('nagios.pgsql_backends.db0', 1000000000, 33., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '180',
                'crit': '190',
                'min': '0',
                'max': '200',
            }),
            ('nagios.pgsql_backends.db1', 1000000000, 1., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '150',
                'crit': '190',
                'min': '0',
                'max': '200',
            }),
            ('nagios.pgsql_backends.db2', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '120',
                'crit': '290',
                'min': '1',
                'max': '200',
            }),
            ('nagios.pgsql_backends.db3', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '110',
                'crit': '195',
                'min': '5',
                'max': '100',
            }),
        ]
        expected_output.sort(key=point_sorter)

        self._write_log(('\t'.join(data) for data in log_data))

        actual_output = dogstream.check(self.agent_config,
                                        move_end=False)['dogstream']
        actual_output.sort(key=point_sorter)

        self.assertEquals(expected_output, actual_output)
Example #25
0
    def __init__(self, agentConfig, emitters, systemStats, hostname):
        self.emit_duration = None
        self.agentConfig = agentConfig
        self.hostname = hostname
        # system stats is generated by config.get_system_stats
        self.agentConfig["system_stats"] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters
        self.check_timings = agentConfig.get("check_timings")
        self.push_times = {
            "host_metadata": {"start": time.time(), "interval": int(agentConfig.get("metadata_interval", 4 * 60 * 60))},
            "external_host_tags": {
                "start": time.time() - 3 * 60,  # Wait for the checks to init
                "interval": int(agentConfig.get("external_host_tags", 5 * 60)),
            },
            "agent_checks": {"start": time.time(), "interval": int(agentConfig.get("agent_checks_interval", 10 * 60))},
            "processes": {"start": time.time(), "interval": int(agentConfig.get("processes_interval", 60))},
        }
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.hostname_metadata_cache = None
        self.initialized_checks_d = []
        self.init_failed_checks_d = {}

        # Unix System Checks
        self._unix_system_checks = {
            "io": u.IO(log),
            "load": u.Load(log),
            "memory": u.Memory(log),
            "processes": u.Processes(log),
            "cpu": u.Cpu(log),
            "system": u.System(log),
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            "io": w32.IO(log),
            "proc": w32.Processes(log),
            "memory": w32.Memory(log),
            "network": w32.Network(log),
            "cpu": w32.Cpu(log),
            "system": w32.System(log),
        }

        # Old-style metric checks
        self._ganglia = Ganglia(log) if self.agentConfig.get("ganglia_host", "") != "" else None
        self._dogstream = None if self.agentConfig.get("dogstreams") is None else Dogstreams.init(log, self.agentConfig)

        # Agent performance metrics check
        self._agent_metrics = None

        self._metrics_checks = []

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get("custom_checks", "").split(",")]:
            if len(module_spec) == 0:
                continue
            try:
                self._metrics_checks.append(modules.load(module_spec, "Check")(log))
                log.info("Registered custom check %s" % module_spec)
                log.warning(
                    "Old format custom checks are deprecated. They should be moved to the checks.d interface as old custom checks will be removed in a next version"
                )
            except Exception:
                log.exception("Unable to load custom check module %s" % module_spec)
Example #26
0
    def test_service_perfdata_special_cases(self):
        from checks.datadog import NagiosServicePerfData

        self._write_nagios_config([
            "service_perfdata_file=%s" % self.log_file.name,
            "service_perfdata_file_template=DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$",
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosServicePerfData],
                          [d.__class__ for d in dogstream.dogstreams])

        log_data = [(
            "DATATYPE::SERVICEPERFDATA",
            "TIMET::1000000000",
            "HOSTNAME::myhost2",
            "SERVICEDESC::Disk Space",
            "SERVICEPERFDATA::" + " ".join([
                "/=5477MB;6450;7256;0;8063",
                "/dev=0MB;2970;3341;0;3713",
                "/dev/shm=0MB;3080;3465;0;3851",
                "/var/run=0MB;3080;3465;0;3851",
                "/var/lock=0MB;3080;3465;0;3851",
                "/lib/init/rw=0MB;3080;3465;0;3851",
                "/mnt=290MB;338636;380966;0;423296",
                "/data=39812MB;40940;46057;0;51175",
            ]),
            "SERVICECHECKCOMMAND::check_all_disks!20%!10%",
            "HOSTSTATE::UP",
            "HOSTSTATETYPE::HARD",
            "SERVICESTATE::OK",
            "SERVICESTATETYPE::HARD",
        )]

        expected_output = [
            ('nagios.disk_space', 1000000000, 5477., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/',
                'unit': 'MB',
                'warn': '6450',
                'crit': '7256',
                'min': '0',
                'max': '8063',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/dev',
                'unit': 'MB',
                'warn': '2970',
                'crit': '3341',
                'min': '0',
                'max': '3713',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/dev/shm',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/var/run',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/var/lock',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/lib/init/rw',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 290., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/mnt',
                'unit': 'MB',
                'warn': '338636',
                'crit': '380966',
                'min': '0',
                'max': '423296',
            }),
            ('nagios.disk_space', 1000000000, 39812., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/data',
                'unit': 'MB',
                'warn': '40940',
                'crit': '46057',
                'min': '0',
                'max': '51175',
            }),
        ]
        expected_output.sort(key=point_sorter)

        self._write_log(('\t'.join(data) for data in log_data))

        actual_output = dogstream.check(self.agent_config,
                                        move_end=False)['dogstream']
        actual_output.sort(key=point_sorter)

        self.assertEquals(expected_output, actual_output)
Example #27
0
    def test_cassandra_parser(self):
        from dogstream import cassandra, common

        log_data = """ INFO [CompactionExecutor:1594] 2012-05-12 21:05:12,924 Saved test_data-Encodings-KeyCache (86400 items) in 85 ms
 INFO [CompactionExecutor:1595] 2012-05-12 21:05:15,144 Saved test_data-Metrics-KeyCache (86400 items) in 96 ms
 INFO [CompactionExecutor:1596] 2012-05-12 21:10:48,058 Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6528-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6531-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6529-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6530-Data.db')]
 INFO [CompactionExecutor:1596] 2012-05-12 21:10:54,851 Compacted to [/var/cassandra/a-hc-65-Data.db,].  102,079,134 to 101,546,397
 INFO [CompactionExecutor:1598] 2012-05-12 22:05:04,313 Saved test_data-ResourcesMetadata-KeyCache (1 items) in 10 ms
 INFO [CompactionExecutor:1599] 2012-05-12 22:05:14,813 Saved test_data-Encodings-KeyCache (86400 items) in 83 ms
 INFO [CompactionExecutor:1630] 2012-05-13 13:05:44,963 Saved test_data-Metrics-KeyCache (86400 items) in 77 ms
 INFO [CompactionExecutor:1631] 2012-05-13 13:15:01,923 Nothing to compact in data_log.  Use forceUserDefinedCompaction if you wish to force compaction of single sstables (e.g. for tombstone collection)
 INFO [CompactionExecutor:1632] 2012-05-13 13:15:01,927 Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6527-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6522-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6532-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6517-Data.db')]
 INFO [CompactionExecutor:1632] 2012-05-13 13:27:17,685 Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally
 INFO [CompactionExecutor:34] 2012-05-14 18:00:41,281 Saved test_data-Encodings-KeyCache (86400 items) in 78 ms
 INFO 13:27:17,685 Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally
"""
        alert_type = cassandra.ALERT_TYPES["INFO"]
        event_type = cassandra.EVENT_TYPE
        event_object = EventDefaults.EVENT_OBJECT

        expected_output = {
            "dogstreamEvents": [
                {
                    "timestamp": cassandra.parse_date("2012-05-12 21:10:48,058"),
                    "msg_title": "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6528-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6531-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6529-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6530-Data.db')]"[0:common.MAX_TITLE_LEN],
                    "msg_text": "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6528-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6531-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6529-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6530-Data.db')]",
                    "alert_type": alert_type,
                    "auto_priority": 0,
                    "event_type": event_type,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },  {
                    "timestamp": cassandra.parse_date("2012-05-12 21:10:54,851"),
                    "msg_title": "Compacted to [/var/cassandra/a-hc-65-Data.db,].  102,079,134 to 101,546,397",
                    "alert_type": alert_type,
                    "auto_priority": 0,
                    "event_type": event_type,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },  {
                    "timestamp": cassandra.parse_date("2012-05-13 13:15:01,927"),
                    "msg_title": "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6527-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6522-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6532-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6517-Data.db')]"[0:common.MAX_TITLE_LEN],
                    "msg_text": "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6527-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6522-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6532-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6517-Data.db')]",
                    "alert_type": alert_type,
                    "event_type": event_type,
                    "auto_priority": 0,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },  {
                    "timestamp": cassandra.parse_date("2012-05-13 13:27:17,685"),
                    "msg_title": "Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally",
                    "alert_type": alert_type,
                    "event_type": event_type,
                    "auto_priority": 0,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },  {
                    "timestamp": cassandra.parse_date(datetime.utcnow().strftime("%Y-%m-%d") + " 13:27:17,685"),
                    "msg_title": "Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally",
                    "alert_type": alert_type,
                    "event_type": event_type,
                    "auto_priority": 0,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },
            ]
        }

        self._write_log(log_data.split("\n"))

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '%s:dogstream.cassandra:parse_cassandra' % self.log_file.name})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Example #28
0
    def test_service_perfdata(self):
        from checks.datadog import NagiosServicePerfData

        self._write_nagios_config([
            "service_perfdata_file=%s" % self.log_file.name,
            "service_perfdata_file_template=DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$",
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosServicePerfData], [d.__class__ for d in dogstream.dogstreams])

        log_data = [
            ("DATATYPE::SERVICEPERFDATA", 
             "TIMET::1000000000", 
             "HOSTNAME::myhost0", 
             "SERVICEDESC::Pgsql Backends", 
             "SERVICEPERFDATA::" + " ".join([
                "time=0.06", 
                "db0=33;180;190;0;200", 
                "db1=1;150;190;0;200",
                "db2=0;120;290;1;200", 
                "db3=0;110;195;5;100"
             ]),
             "SERVICECHECKCOMMAND::check_nrpe_1arg!check_postgres_backends",
             "HOSTSTATE::UP",   
             "HOSTSTATETYPE::HARD",
             "SERVICESTATE::OK",
             "SERVICESTATETYPE::HARD",
            ),
        ]
        
        expected_output = [
            ('nagios.pgsql_backends.time', 1000000000, 0.06, {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
            }),
            ('nagios.pgsql_backends.db0',  1000000000,   33., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '180',
                'crit': '190',
                'min':    '0',
                'max':  '200',
            }),
            ('nagios.pgsql_backends.db1',  1000000000,    1., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '150',
                'crit': '190',
                'min':    '0',
                'max':  '200',
            }),
            ('nagios.pgsql_backends.db2',  1000000000,    0., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '120',
                'crit': '290',
                'min':    '1',
                'max':  '200',
            }),
            ('nagios.pgsql_backends.db3',  1000000000,    0., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '110',
                'crit': '195',
                'min':    '5',
                'max':  '100',
            }),
        ]
        expected_output.sort(key=point_sorter)

        self._write_log(('\t'.join(data) for data in log_data))        

        actual_output = dogstream.check(self.agent_config, move_end=False)['dogstream']
        actual_output.sort(key=point_sorter)

        self.assertEquals(expected_output, actual_output)
Example #29
0
    def __init__(self, agentConfig, emitters, systemStats):
        self.emit_duration = None
        self.agentConfig = agentConfig
        # system stats is generated by config.get_system_stats
        self.agentConfig["system_stats"] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters
        self.metadata_interval = int(agentConfig.get("metadata_interval", 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.metadata_cache = None
        self.checks_d = []

        # Unix System Checks
        self._unix_system_checks = {
            "disk": u.Disk(log),
            "io": u.IO(log),
            "load": u.Load(log),
            "memory": u.Memory(log),
            "network": u.Network(log),
            "processes": u.Processes(log),
            "cpu": u.Cpu(log),
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            "disk": w32.Disk(log),
            "io": w32.IO(log),
            "proc": w32.Processes(log),
            "memory": w32.Memory(log),
            "network": w32.Network(log),
            "cpu": w32.Cpu(log),
        }

        # Old-style metric checks
        self._mongodb = MongoDb(log)
        self._mysql = MySql(log)
        self._rabbitmq = RabbitMq()
        self._ganglia = Ganglia(log)
        self._cassandra = Cassandra()
        self._dogstream = Dogstreams.init(log, self.agentConfig)
        self._ddforwarder = DdForwarder(log, self.agentConfig)
        self._ec2 = EC2(log)

        # Agent Metrics
        self._agent_metrics = CollectorMetrics(log)

        # Metric Checks
        self._metrics_checks = [Memcache(log)]

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get("custom_checks", "").split(",")]:
            if len(module_spec) == 0:
                continue
            try:
                self._metrics_checks.append(modules.load(module_spec, "Check")(log))
                log.info("Registered custom check %s" % module_spec)
            except Exception, e:
                log.exception("Unable to load custom check module %s" % module_spec)
Example #30
0
    def test_service_perfdata_special_cases(self):
        from checks.datadog import NagiosServicePerfData

        self._write_nagios_config([
            "service_perfdata_file=%s" % self.log_file.name,
            "service_perfdata_file_template=DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$",
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosServicePerfData], [d.__class__ for d in dogstream.dogstreams])

        log_data = [
            (   "DATATYPE::SERVICEPERFDATA",
                "TIMET::1000000000",
                "HOSTNAME::myhost2",
                "SERVICEDESC::Disk Space",
                "SERVICEPERFDATA::" + " ".join([
                    "/=5477MB;6450;7256;0;8063",
                    "/dev=0MB;2970;3341;0;3713",
                    "/dev/shm=0MB;3080;3465;0;3851",
                    "/var/run=0MB;3080;3465;0;3851",
                    "/var/lock=0MB;3080;3465;0;3851",
                    "/lib/init/rw=0MB;3080;3465;0;3851",
                    "/mnt=290MB;338636;380966;0;423296",
                    "/data=39812MB;40940;46057;0;51175",
                ]),
                "SERVICECHECKCOMMAND::check_all_disks!20%!10%",
                "HOSTSTATE::UP",
                "HOSTSTATETYPE::HARD",
                "SERVICESTATE::OK",
                "SERVICESTATETYPE::HARD",
            )
        ]
        
        expected_output = [
            ('nagios.disk_space', 1000000000, 5477., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/',
                'unit': 'MB',
                'warn': '6450',
                'crit': '7256',
                'min': '0',
                'max': '8063',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/dev',
                'unit': 'MB',
                'warn': '2970',
                'crit': '3341',
                'min': '0',
                'max': '3713',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/dev/shm',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/var/run',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/var/lock',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/lib/init/rw',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 290., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/mnt',
                'unit': 'MB',
                'warn': '338636',
                'crit': '380966',
                'min': '0',
                'max': '423296',
            }),
            ('nagios.disk_space', 1000000000, 39812., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/data',
                'unit': 'MB',
                'warn': '40940',
                'crit': '46057',
                'min': '0',
                'max': '51175',
            }),
        ]
        expected_output.sort(key=point_sorter)

        self._write_log(('\t'.join(data) for data in log_data))        

        actual_output = dogstream.check(self.agent_config, move_end=False)['dogstream']
        actual_output.sort(key=point_sorter)

        self.assertEquals(expected_output, actual_output)
Example #31
0
    def __init__(self, agentConfig, emitters, systemStats, hostname):
        self.emit_duration = None
        self.agentConfig = agentConfig
        self.hostname = hostname
        # system stats is generated by config.get_system_stats
        self.agentConfig['system_stats'] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters
        self.check_timings = agentConfig.get('check_timings')
        self.push_times = {
            'host_metadata': {
                'start': time.time(),
                'interval': int(agentConfig.get('metadata_interval', 4 * 60 * 60))
            },
            'external_host_tags': {
                'start': time.time() - 3 * 60,  # Wait for the checks to init
                'interval': int(agentConfig.get('external_host_tags', 5 * 60))
            },
            'agent_checks': {
                'start': time.time(),
                'interval': int(agentConfig.get('agent_checks_interval', 10 * 60))
            },
            'processes': {
                'start': time.time(),
                'interval': int(agentConfig.get('processes_interval', 60))
            }
        }
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.hostname_metadata_cache = None
        self.initialized_checks_d = []
        self.init_failed_checks_d = {}

        # Unix System Checks
        self._unix_system_checks = {
            'io': u.IO(log),
            'load': u.Load(log),
            'memory': u.Memory(log),
            'processes': u.Processes(log),
            'cpu': u.Cpu(log),
            'system': u.System(log)
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            'io': w32.IO(log),
            'proc': w32.Processes(log),
            'memory': w32.Memory(log),
            'network': w32.Network(log),
            'cpu': w32.Cpu(log),
            'system': w32.System(log)
        }

        # Old-style metric checks
        self._ganglia = Ganglia(log)
        self._dogstream = Dogstreams.init(log, self.agentConfig)
        self._ddforwarder = DdForwarder(log, self.agentConfig)

        # Agent performance metrics check
        self._agent_metrics = None

        self._metrics_checks = []

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]:
            if len(module_spec) == 0:
                continue
            try:
                self._metrics_checks.append(modules.load(module_spec, 'Check')(log))
                log.info("Registered custom check %s" % module_spec)
                log.warning("Old format custom checks are deprecated. They should be moved to the checks.d interface as old custom checks will be removed in a next version")
            except Exception:
                log.exception('Unable to load custom check module %s' % module_spec)
Example #32
0
    def __init__(self, agentConfig, emitters):
        self.agentConfig = agentConfig
        self.os = getOS()
        self.plugins = None
        self.emitters = emitters            
        self.checksLogger = logging.getLogger('checks')
        self.metadata_interval = int(agentConfig.get('metadata_interval', 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        
        # Unix System Checks
        self._unix_system_checks = {
            'disk': u.Disk(self.checksLogger),
            'io': u.IO(),
            'load': u.Load(self.checksLogger),
            'memory': u.Memory(self.checksLogger),
            'network': u.Network(self.checksLogger),
            'processes': u.Processes(),
            'cpu': u.Cpu()
        }

        # Win32 System Checks
        self._win32_system_checks = {
            'disk': w32.Disk(self.checksLogger),
            'io': w32.IO(self.checksLogger),
            'proc': w32.Processes(self.checksLogger),
            'memory': w32.Memory(self.checksLogger),
            'network': w32.Network(self.checksLogger),
            'cpu': w32.Cpu(self.checksLogger)
        }

        # Old-style metric checks
        self._apache = Apache(self.checksLogger)
        self._couchdb = CouchDb(self.checksLogger)
        self._mongodb = MongoDb(self.checksLogger)
        self._mysql = MySql(self.checksLogger)
        self._rabbitmq = RabbitMq()
        self._ganglia = Ganglia(self.checksLogger)
        self._cassandra = Cassandra()
        self._dogstream = Dogstreams.init(self.checksLogger, self.agentConfig)
        self._ddforwarder = DdForwarder(self.checksLogger, self.agentConfig)
        self._ec2 = EC2(self.checksLogger)

        # Metric Checks
        self._metrics_checks = [
            Varnish(self.checksLogger),
            ElasticSearch(self.checksLogger),
            Jvm(self.checksLogger),
            Tomcat(self.checksLogger),
            ActiveMQ(self.checksLogger),
            Solr(self.checksLogger),
            WMICheck(self.checksLogger),
            Nginx(self.checksLogger),
            Memcache(self.checksLogger),
        ]

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]:
            if len(module_spec) == 0: continue
            try:
                self._metrics_checks.append(modules.load(module_spec, 'Check')(self.checksLogger))
                self.checksLogger.info("Registered custom check %s" % module_spec)
            except Exception, e:
                self.checksLogger.exception('Unable to load custom check module %s' % module_spec)