Exemple #1
0
    def test_dogstream_events_validation(self):
        log_data = [
            {"msg_title": "title", "timestamp": 1336999561},
            {"msg_text": "body", "timestamp": 1336999561},
            {"none of the above": "should get filtered out", "timestamp": 1336999561},
        ]

        expected_output = {
            "dogstreamEvents": [
                {
                    "timestamp": 1336999561,
                    "msg_title": "title",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999561,
                    "msg_text": "body",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

            ]
        }

        self._write_log([repr(d) for d in log_data])

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:repr_event_parser'.format(self.log_file.name, __name__)})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #2
0
    def test_alt_host_perfdata(self):
        from checks.datadog import NagiosHostPerfData

        self._write_nagios_config([
            "host_perfdata_file=%s" % NAGIOS_TEST_HOST,
            "host_perfdata_file_template=%s" % NAGIOS_TEST_HOST_TEMPLATE,
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosHostPerfData],
                          [d.__class__ for d in dogstream.dogstreams])
        actual_output = dogstream.check(self.agent_config, move_end=False)

        expected_output = {
            'dogstream': [('nagios.host.pl', 1339511440, 0.0, {
                'warn': '80',
                'metric_type': 'gauge',
                'host_name': 'localhost',
                'min': '0',
                'crit': '100',
                'unit': '%'
            }),
                          ('nagios.host.rta', 1339511440, 0.048, {
                              'warn': '3000.000000',
                              'metric_type': 'gauge',
                              'host_name': 'localhost',
                              'min': '0.000000',
                              'crit': '5000.000000',
                              'unit': 'ms'
                          })]
        }
        self.assertEquals(expected_output, actual_output)
Exemple #3
0
    def test_dogstream_events(self):
        log_data = [
            '2012-05-14 12:46:01 [ERROR] - host0 is down (broke its collarbone)',
            '2012-05-14 12:48:07 [ERROR] - host1 is down (got a bloody nose)',
            '2012-05-14 12:52:03 [RECOVERY] - host0 is up (collarbone healed)',
            '2012-05-14 12:59:09 [RECOVERY] - host1 is up (nose stopped bleeding)',
        ]
        expected_output = {
            "dogstreamEvents": [
                {
                    "timestamp": 1336999561,
                    "alert_type": "error",
                    "host": "host0",
                    "msg_title": "host0 is down (broke its collarbone)",
                    "msg_text": "2012-05-14 12:46:01 [ERROR] - host0 is down (broke its collarbone)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999687,
                    "alert_type": "error",
                    "host": "host1",
                    "msg_title": "host1 is down (got a bloody nose)",
                    "msg_text": "2012-05-14 12:48:07 [ERROR] - host1 is down (got a bloody nose)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999923,
                    "alert_type": "success",
                    "host": "host0",
                    "msg_title": "host0 is up (collarbone healed)",
                    "msg_text": "2012-05-14 12:52:03 [RECOVERY] - host0 is up (collarbone healed)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1337000349,
                    "alert_type": "success",
                    "host": "host1",
                    "msg_title": "host1 is up (nose stopped bleeding)",
                    "msg_text": "2012-05-14 12:59:09 [RECOVERY] - host1 is up (nose stopped bleeding)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

            ]
        }
        self._write_log(log_data)

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:parse_events'.format(self.log_file.name, __name__)})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #4
0
    def test_dogstream_events(self):
        log_data = [
            '2012-05-14 12:46:01 [ERROR] - host0 is down (broke its collarbone)',
            '2012-05-14 12:48:07 [ERROR] - host1 is down (got a bloody nose)',
            '2012-05-14 12:52:03 [RECOVERY] - host0 is up (collarbone healed)',
            '2012-05-14 12:59:09 [RECOVERY] - host1 is up (nose stopped bleeding)',
        ]
        expected_output = {
            "dogstreamEvents": [
                {
                    "timestamp": 1336999561,
                    "alert_type": "error",
                    "host": "host0",
                    "msg_title": "host0 is down (broke its collarbone)",
                    "msg_text": "2012-05-14 12:46:01 [ERROR] - host0 is down (broke its collarbone)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999687,
                    "alert_type": "error",
                    "host": "host1",
                    "msg_title": "host1 is down (got a bloody nose)",
                    "msg_text": "2012-05-14 12:48:07 [ERROR] - host1 is down (got a bloody nose)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999923,
                    "alert_type": "success",
                    "host": "host0",
                    "msg_title": "host0 is up (collarbone healed)",
                    "msg_text": "2012-05-14 12:52:03 [RECOVERY] - host0 is up (collarbone healed)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1337000349,
                    "alert_type": "success",
                    "host": "host1",
                    "msg_title": "host1 is up (nose stopped bleeding)",
                    "msg_text": "2012-05-14 12:59:09 [RECOVERY] - host1 is up (nose stopped bleeding)",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

            ]
        }
        self._write_log(log_data)

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:parse_events'.format(self.log_file.name, __name__)})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #5
0
    def test_dogstream_events_validation(self):
        log_data = [
            {"msg_title": "title", "timestamp": 1336999561},
            {"msg_text": "body", "timestamp": 1336999561},
            {"none of the above": "should get filtered out", "timestamp": 1336999561},
        ]

        expected_output = {
            "dogstreamEvents": [
                {
                    "timestamp": 1336999561,
                    "msg_title": "title",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

                {
                    "timestamp": 1336999561,
                    "msg_text": "body",
                    "event_type": EventDefaults.EVENT_TYPE,
                    "aggregation_key": EventDefaults.EVENT_OBJECT,
                    "event_object": EventDefaults.EVENT_OBJECT,
                },

            ]
        }

        self._write_log([repr(d) for d in log_data])

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:repr_event_parser'.format(self.log_file.name, __name__)})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #6
0
    def __init__(self, agentConfig, emitters, systemStats):
        self.emit_duration = None
        self.agentConfig = agentConfig
        # system stats is generated by config.get_system_stats
        self.agentConfig["system_stats"] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters
        self.metadata_interval = int(agentConfig.get("metadata_interval", 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.metadata_cache = None
        self.initialized_checks_d = []
        self.init_failed_checks_d = []

        # Unix System Checks
        self._unix_system_checks = {
            "disk": u.Disk(log),
            "io": u.IO(log),
            "load": u.Load(log),
            "memory": u.Memory(log),
            "processes": u.Processes(log),
            "cpu": u.Cpu(log),
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            "disk": w32.Disk(log),
            "io": w32.IO(log),
            "proc": w32.Processes(log),
            "memory": w32.Memory(log),
            "network": w32.Network(log),
            "cpu": w32.Cpu(log),
        }

        # Old-style metric checks
        self._ganglia = Ganglia(log)
        self._dogstream = Dogstreams.init(log, self.agentConfig)
        self._ddforwarder = DdForwarder(log, self.agentConfig)

        # Agent Metrics
        self._agent_metrics = CollectorMetrics(log)

        self._metrics_checks = []

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get("custom_checks", "").split(",")]:
            if len(module_spec) == 0:
                continue
            try:
                self._metrics_checks.append(modules.load(module_spec, "Check")(log))
                log.info("Registered custom check %s" % module_spec)
                log.warning(
                    "Old format custom checks are deprecated. They should be moved to the checks.d interface as old custom checks will be removed in a next version"
                )
            except Exception, e:
                log.exception("Unable to load custom check module %s" % module_spec)
Exemple #7
0
    def __init__(self, agentConfig, emitters, systemStats):
        self.emit_duration = None
        self.agentConfig = agentConfig
        # system stats is generated by config.get_system_stats
        self.agentConfig['system_stats'] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters            
        self.metadata_interval = int(agentConfig.get('metadata_interval', 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.metadata_cache = None
        self.checks_d = []
        
        # Unix System Checks
        self._unix_system_checks = {
            'disk': u.Disk(log),
            'io': u.IO(log),
            'load': u.Load(log),
            'memory': u.Memory(log),
            'processes': u.Processes(log),
            'cpu': u.Cpu(log)
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            'disk': w32.Disk(log),
            'io': w32.IO(log),
            'proc': w32.Processes(log),
            'memory': w32.Memory(log),
            'network': w32.Network(log),
            'cpu': w32.Cpu(log)
        }

        # Old-style metric checks
        self._ganglia = Ganglia(log)
        self._cassandra = Cassandra()
        self._dogstream = Dogstreams.init(log, self.agentConfig)
        self._ddforwarder = DdForwarder(log, self.agentConfig)

        # Agent Metrics
        self._agent_metrics = CollectorMetrics(log)

        # Metric Checks
        self._metrics_checks = [
            Memcache(log),
        ]

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]:
            if len(module_spec) == 0: continue
            try:
                self._metrics_checks.append(modules.load(module_spec, 'Check')(log))
                log.info("Registered custom check %s" % module_spec)
            except Exception, e:
                log.exception('Unable to load custom check module %s' % module_spec)
Exemple #8
0
    def __init__(self, agentConfig, emitters, systemStats):
        self.emit_duration = None
        self.agentConfig = agentConfig
        # system stats is generated by config.get_system_stats
        self.agentConfig['system_stats'] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters
        self.metadata_interval = int(agentConfig.get('metadata_interval', 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.metadata_cache = None
        self.initialized_checks_d = []
        self.init_failed_checks_d = []

        # Unix System Checks
        self._unix_system_checks = {
            'disk': u.Disk(log),
            'io': u.IO(log),
            'load': u.Load(log),
            'memory': u.Memory(log),
            'processes': u.Processes(log),
            'cpu': u.Cpu(log)
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            'disk': w32.Disk(log),
            'io': w32.IO(log),
            'proc': w32.Processes(log),
            'memory': w32.Memory(log),
            'network': w32.Network(log),
            'cpu': w32.Cpu(log)
        }

        # Old-style metric checks
        self._ganglia = Ganglia(log)
        self._dogstream = Dogstreams.init(log, self.agentConfig)
        self._ddforwarder = DdForwarder(log, self.agentConfig)

        # Agent Metrics
        self._agent_metrics = CollectorMetrics(log)

        self._metrics_checks = []

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]:
            if len(module_spec) == 0: continue
            try:
                self._metrics_checks.append(modules.load(module_spec, 'Check')(log))
                log.info("Registered custom check %s" % module_spec)
                log.warning("Old format custom checks are deprecated. They should be moved to the checks.d interface as old custom checks will be removed in a next version")
            except Exception, e:
                log.exception('Unable to load custom check module %s' % module_spec)
Exemple #9
0
    def setUp(self):
        TailTestCase.setUp(self)

        self.config = {
            'dogstreams': self.log_file.name,
            'check_freq': 5,
        }
        log.info("Test config: %s" % self.config)
        self.dogstream = Dogstreams.init(self.logger, self.config)
        self.maxDiff = None
Exemple #10
0
    def setUp(self):
        TailTestCase.setUp(self)

        self.config = {
            'dogstreams': self.log_file.name,
            'check_freq': 5,
        }
        log.info("Test config: %s" % self.config)
        self.dogstream = Dogstreams.init(self.logger, self.config)
        self.maxDiff = None
Exemple #11
0
    def test_host_perfdata(self):
        from checks.datadog import NagiosHostPerfData

        self._write_nagios_config([
            "host_perfdata_file=%s" % self.log_file.name,
            "host_perfdata_file_template=DATATYPE::HOSTPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tHOSTPERFDATA::$HOSTPERFDATA$\tHOSTCHECKCOMMAND::$HOSTCHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$",
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosHostPerfData],
                          [d.__class__ for d in dogstream.dogstreams])

        log_data = [
            (
                "DATATYPE::HOSTPERFDATA",
                "TIMET::1000000010",
                "HOSTNAME::myhost1",
                "HOSTPERFDATA::" + " ".join([
                    "rta=0.978000ms;5000.000000;5000.000000;0.000000",
                    "pl=0%;100;100;0",
                ]),
                "HOSTCHECKCOMMAND::check-host-alive",
                "HOSTSTATE::UP",
                "HOSTSTATETYPE::HARD",
            ),
        ]

        expected_output = [
            ('nagios.host.rta', 1000000010, 0.978, {
                'metric_type': 'gauge',
                'host_name': 'myhost1',
                'unit': 'ms',
                'warn': '5000.000000',
                'crit': '5000.000000',
                'min': '0.000000'
            }),
            ('nagios.host.pl', 1000000010, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost1',
                'unit': '%',
                'warn': '100',
                'crit': '100',
                'min': '0'
            }),
        ]
        expected_output.sort(key=point_sorter)

        self._write_log(('\t'.join(data) for data in log_data))

        actual_output = dogstream.check(self.agent_config,
                                        move_end=False)['dogstream']
        actual_output.sort(key=point_sorter)

        self.assertEquals(expected_output, actual_output)
Exemple #12
0
    def test_host_perfdata(self):
        from checks.datadog import NagiosHostPerfData

        self._write_nagios_config([
            "host_perfdata_file=%s" % self.log_file.name,
            "host_perfdata_file_template=DATATYPE::HOSTPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tHOSTPERFDATA::$HOSTPERFDATA$\tHOSTCHECKCOMMAND::$HOSTCHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$",
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosHostPerfData], [d.__class__ for d in dogstream.dogstreams])

        log_data = [
            ("DATATYPE::HOSTPERFDATA", 
             "TIMET::1000000010", 
             "HOSTNAME::myhost1", 
             "HOSTPERFDATA::" + " ".join([
                "rta=0.978000ms;5000.000000;5000.000000;0.000000", 
                "pl=0%;100;100;0", 
             ]),
             "HOSTCHECKCOMMAND::check-host-alive",
             "HOSTSTATE::UP",   
             "HOSTSTATETYPE::HARD",
            ),
        ]
        
        expected_output = [
            ('nagios.host.rta', 1000000010, 0.978, {
                'metric_type': 'gauge',
                'host_name': 'myhost1',
                'unit': 'ms',
                'warn': '5000.000000',
                'crit': '5000.000000',
                'min': '0.000000'
            }),
            ('nagios.host.pl',  1000000010, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost1',
                'unit': '%',
                'warn': '100',
                'crit': '100',
                'min': '0'
            }),
        ]
        expected_output.sort(key=point_sorter)

        self._write_log(('\t'.join(data) for data in log_data))        

        actual_output = dogstream.check(self.agent_config, move_end=False)['dogstream']
        actual_output.sort(key=point_sorter)

        self.assertEquals(expected_output, actual_output)
Exemple #13
0
    def __init__(self, agentConfig, emitters):
        self.agentConfig = agentConfig
        self.plugins = None
        self.emitters = emitters
        self.os = None

        self.checksLogger = logging.getLogger("checks")
        socket.setdefaulttimeout(15)

        self._apache = Apache(self.checksLogger)
        self._nginx = Nginx(self.checksLogger)
        self._disk = Disk(self.checksLogger)
        self._io = IO()
        self._load = Load(self.checksLogger)
        self._memory = Memory(self.checksLogger)
        self._network = Network(self.checksLogger)
        self._processes = Processes()
        self._cpu = Cpu()
        self._couchdb = CouchDb(self.checksLogger)
        self._mongodb = MongoDb(self.checksLogger)
        self._mysql = MySql(self.checksLogger)
        self._pgsql = PostgreSql(self.checksLogger)
        self._rabbitmq = RabbitMq()
        self._ganglia = Ganglia(self.checksLogger)
        self._cassandra = Cassandra()
        self._redis = Redis(self.checksLogger)
        self._jvm = Jvm(self.checksLogger)
        self._tomcat = Tomcat(self.checksLogger)
        self._activemq = ActiveMQ(self.checksLogger)
        self._solr = Solr(self.checksLogger)
        self._memcache = Memcache(self.checksLogger)
        self._dogstream = Dogstreams.init(self.checksLogger, self.agentConfig)
        self._ddforwarder = DdForwarder(self.checksLogger, self.agentConfig)

        # All new checks should be metrics checks:
        self._metrics_checks = [
            Cacti(self.checksLogger),
            Redis(self.checksLogger),
            Varnish(self.checksLogger),
            ElasticSearch(self.checksLogger),
        ]

        for module_spec in [s.strip() for s in self.agentConfig.get("custom_checks", "").split(",")]:
            if len(module_spec) == 0:
                continue
            try:
                self._metrics_checks.append(modules.load(module_spec, "Check")(self.checksLogger))
                self.checksLogger.info("Registered custom check %s" % module_spec)
            except Exception, e:
                self.checksLogger.exception("Unable to load custom check module %s" % module_spec)
Exemple #14
0
    def test_alt_service_perfdata(self):
        from checks.datadog import NagiosServicePerfData

        self._write_nagios_config([
            "service_perfdata_file=%s" % NAGIOS_TEST_SVC,
            "service_perfdata_file_template=%s" % NAGIOS_TEST_SVC_TEMPLATE,
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosServicePerfData], [d.__class__ for d in dogstream.dogstreams])
        actual_output = dogstream.check(self.agent_config, move_end=False)

        expected_output = {'dogstream': [('nagios.current_users.users', 1339511440, 1.0, {'metric_type': 'gauge', 'warn': '20', 'host_name': 'localhost', 'crit': '50', 'min': '0'}), ('nagios.ping.pl', 1339511500, 0.0, {'warn': '20', 'metric_type': 'gauge', 'host_name': 'localhost', 'min': '0', 'crit': '60', 'unit': '%'}), ('nagios.ping.rta', 1339511500, 0.065, {'warn': '100.000000', 'metric_type': 'gauge', 'host_name': 'localhost', 'min': '0.000000', 'crit': '500.000000', 'unit': 'ms'}), ('nagios.root_partition', 1339511560, 2470.0, {'min': '0', 'max': '7315', 'device_name': '/', 'warn': '5852', 'metric_type': 'gauge', 'host_name': 'localhost', 'crit': '6583', 'unit': 'MB'})]}
        self.assertEquals(expected_output, actual_output)
Exemple #15
0
    def test_alt_host_perfdata(self):
        from checks.datadog import NagiosHostPerfData

        self._write_nagios_config([
            "host_perfdata_file=%s" % NAGIOS_TEST_HOST,
            "host_perfdata_file_template=%s" % NAGIOS_TEST_HOST_TEMPLATE,
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosHostPerfData], [d.__class__ for d in dogstream.dogstreams])
        actual_output = dogstream.check(self.agent_config, move_end=False)

        expected_output = {'dogstream': [('nagios.host.pl', 1339511440, 0.0, {'warn': '80', 'metric_type': 'gauge', 'host_name': 'localhost', 'min': '0', 'crit': '100', 'unit': '%'}), ('nagios.host.rta', 1339511440, 0.048, {'warn': '3000.000000', 'metric_type': 'gauge', 'host_name': 'localhost', 'min': '0.000000', 'crit': '5000.000000', 'unit': 'ms'})]}
        self.assertEquals(expected_output, actual_output)
Exemple #16
0
 def test_dogstream_ancient_function_plugin(self):
     """Ensure that pre-stateful plugins still work"""
     log_data = [
         'test.metric.simple 1000000000 1 metric_type=gauge',
         'test.metric.simple 1100000000 1 metric_type=gauge'
     ]
     expected_output = {
         "dogstream": [
             ('test.metric.simple', 1000000000, 1, self.gauge),
             ('test.metric.simple', 1100000000, 1, self.gauge)]
     }
     self._write_log(log_data)
     plugdog = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:parse_ancient_function_plugin'.format(self.log_file.name, __name__)})
     actual_output = plugdog.check(self.config, move_end=False)
Exemple #17
0
 def test_dogstream_ancient_function_plugin(self):
     """Ensure that pre-stateful plugins still work"""
     log_data = [
         'test.metric.simple 1000000000 1 metric_type=gauge',
         'test.metric.simple 1100000000 1 metric_type=gauge'
     ]
     expected_output = {
         "dogstream": [
             ('test.metric.simple', 1000000000, 1, self.gauge),
             ('test.metric.simple', 1100000000, 1, self.gauge)]
     }
     self._write_log(log_data)
     plugdog = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:parse_ancient_function_plugin'.format(self.log_file.name, __name__)})
     actual_output = plugdog.check(self.config, move_end=False)
Exemple #18
0
    def test_alt_service_perfdata(self):
        from checks.datadog import NagiosServicePerfData

        self._write_nagios_config([
            "service_perfdata_file=%s" % NAGIOS_TEST_SVC,
            "service_perfdata_file_template=%s" % NAGIOS_TEST_SVC_TEMPLATE,
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosServicePerfData],
                          [d.__class__ for d in dogstream.dogstreams])
        actual_output = dogstream.check(self.agent_config, move_end=False)

        expected_output = {
            'dogstream': [('nagios.current_users.users', 1339511440, 1.0, {
                'metric_type': 'gauge',
                'warn': '20',
                'host_name': 'localhost',
                'crit': '50',
                'min': '0'
            }),
                          ('nagios.ping.pl', 1339511500, 0.0, {
                              'warn': '20',
                              'metric_type': 'gauge',
                              'host_name': 'localhost',
                              'min': '0',
                              'crit': '60',
                              'unit': '%'
                          }),
                          ('nagios.ping.rta', 1339511500, 0.065, {
                              'warn': '100.000000',
                              'metric_type': 'gauge',
                              'host_name': 'localhost',
                              'min': '0.000000',
                              'crit': '500.000000',
                              'unit': 'ms'
                          }),
                          ('nagios.root_partition', 1339511560, 2470.0, {
                              'min': '0',
                              'max': '7315',
                              'device_name': '/',
                              'warn': '5852',
                              'metric_type': 'gauge',
                              'host_name': 'localhost',
                              'crit': '6583',
                              'unit': 'MB'
                          })]
        }
        self.assertEquals(expected_output, actual_output)
Exemple #19
0
    def test_dogstream_stateful(self):
        log_data = [
            'test.metric.accumulator 1000000000 1 metric_type=counter',
            'test.metric.accumulator 1100000000 1 metric_type=counter'
        ]
        expected_output = {
            "dogstream": [
                ('test.metric.accumulator', 1000000000, 1, self.counter),
                ('test.metric.accumulator', 1100000000, 2, self.counter)]
        }
        self._write_log(log_data)

        statedog = Dogstreams.init(self.logger, {'dogstreams': '%s:tests.test_datadog:parse_stateful' % self.log_file.name})
        actual_output = statedog.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #20
0
    def test_dogstream_new_plugin(self):
        """Ensure that class-based stateful plugins work"""
        log_data = [
            'test.metric.accumulator 1000000000 1 metric_type=counter',
            'test.metric.accumulator 1100000000 1 metric_type=counter'
        ]
        expected_output = {
            "dogstream": [
                ('foo.bar:test.metric.accumulator', 1000000000, 1, self.counter),
                ('foo.bar:test.metric.accumulator', 1100000000, 2, self.counter)]
        }
        self._write_log(log_data)

        statedog = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:ParseClassPlugin:foo:bar'.format(self.log_file.name, __name__)})
        actual_output = statedog.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #21
0
    def test_dogstream_function_plugin(self):
        """Ensure that non-class-based stateful plugins work"""
        log_data = [
            'test.metric.accumulator 1000000000 1 metric_type=counter',
            'test.metric.accumulator 1100000000 1 metric_type=counter'
        ]
        expected_output = {
            "dogstream": [
                ('test.metric.accumulator', 1000000000, 1, self.counter),
                ('test.metric.accumulator', 1100000000, 2, self.counter)]
        }
        self._write_log(log_data)

        statedog = Dogstreams.init(self.logger, {'dogstreams': '%s:tests.test_datadog:parse_function_plugin' % self.log_file.name})
        actual_output = statedog.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #22
0
    def test_dogstream_new_plugin(self):
        """Ensure that class-based stateful plugins work"""
        log_data = [
            'test.metric.accumulator 1000000000 1 metric_type=counter',
            'test.metric.accumulator 1100000000 1 metric_type=counter'
        ]
        expected_output = {
            "dogstream": [
                ('foo.bar:test.metric.accumulator', 1000000000, 1, self.counter),
                ('foo.bar:test.metric.accumulator', 1100000000, 2, self.counter)]
        }
        self._write_log(log_data)

        statedog = Dogstreams.init(self.logger, {'dogstreams': '{0}:{1}:ParseClassPlugin:foo:bar'.format(self.log_file.name, __name__)})
        actual_output = statedog.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #23
0
    def test_dogstream_function_plugin(self):
        """Ensure that non-class-based stateful plugins work"""
        log_data = [
            'test.metric.accumulator 1000000000 1 metric_type=counter',
            'test.metric.accumulator 1100000000 1 metric_type=counter'
        ]
        expected_output = {
            "dogstream": [
                ('test.metric.accumulator', 1000000000, 1, self.counter),
                ('test.metric.accumulator', 1100000000, 2, self.counter)]
        }
        self._write_log(log_data)

        statedog = Dogstreams.init(self.logger, {'dogstreams': '%s:tests.test_datadog:parse_function_plugin' % self.log_file.name})
        actual_output = statedog.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #24
0
    def __init__(self, agentConfig, emitters):
        self.agentConfig = agentConfig
        self.plugins = None
        self.emitters = emitters
        self.os = None
        
        self.checksLogger = logging.getLogger('checks')
        socket.setdefaulttimeout(15)
        
        self._apache = Apache(self.checksLogger)
        self._nginx = Nginx(self.checksLogger)
        self._disk = Disk(self.checksLogger)
        self._io = IO()
        self._load = Load(self.checksLogger)
        self._memory = Memory(self.checksLogger)
        self._network = Network(self.checksLogger)
        self._processes = Processes()
        self._cpu = Cpu()
        self._couchdb = CouchDb(self.checksLogger)
        self._mongodb = MongoDb(self.checksLogger)
        self._mysql = MySql(self.checksLogger)
        self._pgsql = PostgreSql(self.checksLogger)
        self._rabbitmq = RabbitMq()
        self._ganglia = Ganglia(self.checksLogger)
        self._cassandra = Cassandra()
        self._redis = Redis(self.checksLogger)
        self._jvm = Jvm(self.checksLogger)
        self._tomcat = Tomcat(self.checksLogger)
        self._activemq = ActiveMQ(self.checksLogger)
        self._solr = Solr(self.checksLogger)
        self._memcache = Memcache(self.checksLogger)
        self._dogstream = Dogstreams.init(self.checksLogger, self.agentConfig)
        self._ddforwarder = DdForwarder(self.checksLogger, self.agentConfig)

        # All new checks should be metrics checks:
        self._metrics_checks = [
            Cacti(self.checksLogger),
            Redis(self.checksLogger),
            Varnish(self.checksLogger),
            ElasticSearch(self.checksLogger),
            ]
        self._event_checks = [Hudson(), Nagios(socket.gethostname())]
        self._resources_checks = [ResProcesses(self.checksLogger,self.agentConfig)]

        self._ec2 = EC2(self.checksLogger)
Exemple #25
0
    def test_supervisord_parser(self):
        from dogstream import supervisord_log
        log_data = """2012-07-16 22:30:48,335 INFO spawned: 'monitor' with pid 20216
2012-07-14 03:02:47,325 INFO success: foo_bar entered RUNNING state, process has stayed up for > than 2 seconds (startsecs)
2012-07-17 02:53:04,600 CRIT Server 'inet_http_server' running without any HTTP authentication checking
2012-07-14 04:54:34,193 WARN received SIGTERM indicating exit request
"""
        event_type = supervisord_log.EVENT_TYPE

        expected_output = {
            "dogstreamEvents": [
                {
                    "alert_type": "info", "event_type": event_type,
                    "aggregation_key": "monitor",
                    "event_object": "monitor",
                    "msg_title": "spawned: 'monitor' with pid 20216",
                    "timestamp": int(time.mktime(datetime(2012, 7, 16, 22, 30, 48).timetuple())),
                }, {
                    "alert_type": "success", "event_type": event_type,
                    "aggregation_key": "foo_bar",
                    "event_object": "foo_bar",
                    "msg_title": "success: foo_bar entered RUNNING state, "
                    "process has stayed up for > than 2 seconds (startsecs)",
                    "timestamp": int(time.mktime(datetime(2012, 7, 14, 3, 2, 47).timetuple())),
                }, {
                    "alert_type": "error", "event_type": event_type,
                    "aggregation_key": "inet_http_server",
                    "event_object": "inet_http_server",
                    "msg_title": "Server 'inet_http_server' running without any HTTP authentication checking",
                    "timestamp": int(time.mktime(datetime(2012, 7, 17, 2, 53, 4).timetuple())),
                }, {
                    "alert_type": "warning", "event_type": event_type,
                    "aggregation_key": "SIGTERM",
                    "event_object": "SIGTERM",
                    "msg_title": "received SIGTERM indicating exit request",
                    "timestamp": int(time.mktime(datetime(2012, 7, 14, 4, 54, 34).timetuple())),
                },
            ]
        }
        self._write_log(log_data.split("\n"))

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '%s:dogstream.supervisord_log:parse_supervisord' % self.log_file.name})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #26
0
    def test_supervisord_parser(self):
        from dogstream import supervisord_log
        log_data = """2012-07-16 22:30:48,335 INFO spawned: 'monitor' with pid 20216
2012-07-14 03:02:47,325 INFO success: foo_bar entered RUNNING state, process has stayed up for > than 2 seconds (startsecs)
2012-07-17 02:53:04,600 CRIT Server 'inet_http_server' running without any HTTP authentication checking
2012-07-14 04:54:34,193 WARN received SIGTERM indicating exit request
"""
        event_type = supervisord_log.EVENT_TYPE

        expected_output = {
            "dogstreamEvents":[
                {
                    "alert_type": "info", "event_type": event_type,
                    "aggregation_key": "monitor",
                    "event_object": "monitor",
                    "msg_title": "spawned: 'monitor' with pid 20216",
                    "timestamp": int(time.mktime(datetime(2012, 7, 16, 22, 30, 48).timetuple())),
                }, {
                    "alert_type": "success", "event_type": event_type,
                    "aggregation_key": "foo_bar",
                    "event_object": "foo_bar",
                    "msg_title": "success: foo_bar entered RUNNING state, "
                    "process has stayed up for > than 2 seconds (startsecs)",
                    "timestamp": int(time.mktime(datetime(2012, 7, 14, 3, 2, 47).timetuple())),
                }, {
                    "alert_type": "error", "event_type": event_type,
                    "aggregation_key": "inet_http_server",
                    "event_object": "inet_http_server",
                    "msg_title": "Server 'inet_http_server' running without any HTTP authentication checking",
                    "timestamp": int(time.mktime(datetime(2012, 7, 17, 2, 53, 4).timetuple())),
                }, {
                    "alert_type": "warning", "event_type": event_type,
                    "aggregation_key": "SIGTERM",
                    "event_object": "SIGTERM",
                    "msg_title": "received SIGTERM indicating exit request",
                    "timestamp": int(time.mktime(datetime(2012, 7, 14, 4, 54, 34).timetuple())),
                },
            ]}
        self._write_log(log_data.split("\n"))

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '%s:dogstream.supervisord_log:parse_supervisord' % self.log_file.name})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #27
0
    def test_cassandra_parser(self):
        from dogstream import cassandra, common

        log_data = """ INFO [CompactionExecutor:1594] 2012-05-12 21:05:12,924 Saved test_data-Encodings-KeyCache (86400 items) in 85 ms
 INFO [CompactionExecutor:1595] 2012-05-12 21:05:15,144 Saved test_data-Metrics-KeyCache (86400 items) in 96 ms
 INFO [CompactionExecutor:1596] 2012-05-12 21:10:48,058 Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6528-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6531-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6529-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6530-Data.db')]
 INFO [CompactionExecutor:1596] 2012-05-12 21:10:54,851 Compacted to [/var/cassandra/a-hc-65-Data.db,].  102,079,134 to 101,546,397
 INFO [CompactionExecutor:1598] 2012-05-12 22:05:04,313 Saved test_data-ResourcesMetadata-KeyCache (1 items) in 10 ms
 INFO [CompactionExecutor:1599] 2012-05-12 22:05:14,813 Saved test_data-Encodings-KeyCache (86400 items) in 83 ms
 INFO [CompactionExecutor:1630] 2012-05-13 13:05:44,963 Saved test_data-Metrics-KeyCache (86400 items) in 77 ms
 INFO [CompactionExecutor:1631] 2012-05-13 13:15:01,923 Nothing to compact in data_log.  Use forceUserDefinedCompaction if you wish to force compaction of single sstables (e.g. for tombstone collection)
 INFO [CompactionExecutor:1632] 2012-05-13 13:15:01,927 Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6527-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6522-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6532-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6517-Data.db')]
 INFO [CompactionExecutor:1632] 2012-05-13 13:27:17,685 Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally
 INFO [CompactionExecutor:34] 2012-05-14 18:00:41,281 Saved test_data-Encodings-KeyCache (86400 items) in 78 ms
 INFO 13:27:17,685 Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally
"""
        alert_type = cassandra.ALERT_TYPES["INFO"]
        event_type = cassandra.EVENT_TYPE
        event_object = EventDefaults.EVENT_OBJECT

        expected_output = {
            "dogstreamEvents": [
                {
                    "timestamp":
                    cassandra.parse_date("2012-05-12 21:10:48,058"),
                    "msg_title":
                    "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6528-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6531-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6529-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6530-Data.db')]"[
                        0:common.MAX_TITLE_LEN],
                    "msg_text":
                    "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6528-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6531-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6529-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6530-Data.db')]",
                    "alert_type":
                    alert_type,
                    "auto_priority":
                    0,
                    "event_type":
                    event_type,
                    "aggregation_key":
                    event_object,
                    "event_object":
                    event_object,
                },
                {
                    "timestamp":
                    cassandra.parse_date("2012-05-12 21:10:54,851"),
                    "msg_title":
                    "Compacted to [/var/cassandra/a-hc-65-Data.db,].  102,079,134 to 101,546,397",
                    "alert_type": alert_type,
                    "auto_priority": 0,
                    "event_type": event_type,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },
                {
                    "timestamp":
                    cassandra.parse_date("2012-05-13 13:15:01,927"),
                    "msg_title":
                    "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6527-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6522-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6532-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6517-Data.db')]"[
                        0:common.MAX_TITLE_LEN],
                    "msg_text":
                    "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6527-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6522-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6532-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6517-Data.db')]",
                    "alert_type":
                    alert_type,
                    "event_type":
                    event_type,
                    "auto_priority":
                    0,
                    "aggregation_key":
                    event_object,
                    "event_object":
                    event_object,
                },
                {
                    "timestamp":
                    cassandra.parse_date("2012-05-13 13:27:17,685"),
                    "msg_title":
                    "Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally",
                    "alert_type": alert_type,
                    "event_type": event_type,
                    "auto_priority": 0,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },
                {
                    "timestamp":
                    cassandra.parse_date(
                        datetime.utcnow().strftime("%Y-%m-%d") +
                        " 13:27:17,685"),
                    "msg_title":
                    "Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally",
                    "alert_type":
                    alert_type,
                    "event_type":
                    event_type,
                    "auto_priority":
                    0,
                    "aggregation_key":
                    event_object,
                    "event_object":
                    event_object,
                },
            ]
        }

        self._write_log(log_data.split("\n"))

        dogstream = Dogstreams.init(
            self.logger, {
                'dogstreams':
                '%s:dogstream.cassandra:parse_cassandra' % self.log_file.name
            })
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #28
0
    def test_cassandra_parser(self):
        from dogstream import cassandra, common

        log_data = """ INFO [CompactionExecutor:1594] 2012-05-12 21:05:12,924 Saved test_data-Encodings-KeyCache (86400 items) in 85 ms
 INFO [CompactionExecutor:1595] 2012-05-12 21:05:15,144 Saved test_data-Metrics-KeyCache (86400 items) in 96 ms
 INFO [CompactionExecutor:1596] 2012-05-12 21:10:48,058 Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6528-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6531-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6529-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6530-Data.db')]
 INFO [CompactionExecutor:1596] 2012-05-12 21:10:54,851 Compacted to [/var/cassandra/a-hc-65-Data.db,].  102,079,134 to 101,546,397
 INFO [CompactionExecutor:1598] 2012-05-12 22:05:04,313 Saved test_data-ResourcesMetadata-KeyCache (1 items) in 10 ms
 INFO [CompactionExecutor:1599] 2012-05-12 22:05:14,813 Saved test_data-Encodings-KeyCache (86400 items) in 83 ms
 INFO [CompactionExecutor:1630] 2012-05-13 13:05:44,963 Saved test_data-Metrics-KeyCache (86400 items) in 77 ms
 INFO [CompactionExecutor:1631] 2012-05-13 13:15:01,923 Nothing to compact in data_log.  Use forceUserDefinedCompaction if you wish to force compaction of single sstables (e.g. for tombstone collection)
 INFO [CompactionExecutor:1632] 2012-05-13 13:15:01,927 Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6527-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6522-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6532-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6517-Data.db')]
 INFO [CompactionExecutor:1632] 2012-05-13 13:27:17,685 Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally
 INFO [CompactionExecutor:34] 2012-05-14 18:00:41,281 Saved test_data-Encodings-KeyCache (86400 items) in 78 ms
 INFO 13:27:17,685 Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally
"""
        alert_type = cassandra.ALERT_TYPES["INFO"]
        event_type = cassandra.EVENT_TYPE
        event_object = EventDefaults.EVENT_OBJECT

        expected_output = {
            "dogstreamEvents": [
                {
                    "timestamp": cassandra.parse_date("2012-05-12 21:10:48,058"),
                    "msg_title": "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6528-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6531-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6529-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6530-Data.db')]"[0:common.MAX_TITLE_LEN],
                    "msg_text": "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6528-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6531-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6529-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6530-Data.db')]",
                    "alert_type": alert_type,
                    "auto_priority": 0,
                    "event_type": event_type,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },  {
                    "timestamp": cassandra.parse_date("2012-05-12 21:10:54,851"),
                    "msg_title": "Compacted to [/var/cassandra/a-hc-65-Data.db,].  102,079,134 to 101,546,397",
                    "alert_type": alert_type,
                    "auto_priority": 0,
                    "event_type": event_type,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },  {
                    "timestamp": cassandra.parse_date("2012-05-13 13:15:01,927"),
                    "msg_title": "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6527-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6522-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6532-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6517-Data.db')]"[0:common.MAX_TITLE_LEN],
                    "msg_text": "Compacting [SSTableReader(path='/var/cassandra/data/test_data/series-hc-6527-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6522-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6532-Data.db'), SSTableReader(path='/var/cassandra/data/test_data/series-hc-6517-Data.db')]",
                    "alert_type": alert_type,
                    "event_type": event_type,
                    "auto_priority": 0,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },  {
                    "timestamp": cassandra.parse_date("2012-05-13 13:27:17,685"),
                    "msg_title": "Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally",
                    "alert_type": alert_type,
                    "event_type": event_type,
                    "auto_priority": 0,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },  {
                    "timestamp": cassandra.parse_date(datetime.utcnow().strftime("%Y-%m-%d") + " 13:27:17,685"),
                    "msg_title": "Compacting large row test_data/series:6c6f677c32 (782001077 bytes) incrementally",
                    "alert_type": alert_type,
                    "event_type": event_type,
                    "auto_priority": 0,
                    "aggregation_key": event_object,
                    "event_object": event_object,
                },
            ]
        }

        self._write_log(log_data.split("\n"))

        dogstream = Dogstreams.init(self.logger, {'dogstreams': '%s:dogstream.cassandra:parse_cassandra' % self.log_file.name})
        actual_output = dogstream.check(self.config, move_end=False)
        self.assertEquals(expected_output, actual_output)
Exemple #29
0
    def test_service_perfdata(self):
        from checks.datadog import NagiosServicePerfData

        self._write_nagios_config([
            "service_perfdata_file=%s" % self.log_file.name,
            "service_perfdata_file_template=DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$",
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosServicePerfData],
                          [d.__class__ for d in dogstream.dogstreams])

        log_data = [
            (
                "DATATYPE::SERVICEPERFDATA",
                "TIMET::1000000000",
                "HOSTNAME::myhost0",
                "SERVICEDESC::Pgsql Backends",
                "SERVICEPERFDATA::" + " ".join([
                    "time=0.06", "db0=33;180;190;0;200", "db1=1;150;190;0;200",
                    "db2=0;120;290;1;200", "db3=0;110;195;5;100"
                ]),
                "SERVICECHECKCOMMAND::check_nrpe_1arg!check_postgres_backends",
                "HOSTSTATE::UP",
                "HOSTSTATETYPE::HARD",
                "SERVICESTATE::OK",
                "SERVICESTATETYPE::HARD",
            ),
        ]

        expected_output = [
            ('nagios.pgsql_backends.time', 1000000000, 0.06, {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
            }),
            ('nagios.pgsql_backends.db0', 1000000000, 33., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '180',
                'crit': '190',
                'min': '0',
                'max': '200',
            }),
            ('nagios.pgsql_backends.db1', 1000000000, 1., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '150',
                'crit': '190',
                'min': '0',
                'max': '200',
            }),
            ('nagios.pgsql_backends.db2', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '120',
                'crit': '290',
                'min': '1',
                'max': '200',
            }),
            ('nagios.pgsql_backends.db3', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '110',
                'crit': '195',
                'min': '5',
                'max': '100',
            }),
        ]
        expected_output.sort(key=point_sorter)

        self._write_log(('\t'.join(data) for data in log_data))

        actual_output = dogstream.check(self.agent_config,
                                        move_end=False)['dogstream']
        actual_output.sort(key=point_sorter)

        self.assertEquals(expected_output, actual_output)
Exemple #30
0
    def test_service_perfdata_special_cases(self):
        from checks.datadog import NagiosServicePerfData

        self._write_nagios_config([
            "service_perfdata_file=%s" % self.log_file.name,
            "service_perfdata_file_template=DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$",
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosServicePerfData],
                          [d.__class__ for d in dogstream.dogstreams])

        log_data = [(
            "DATATYPE::SERVICEPERFDATA",
            "TIMET::1000000000",
            "HOSTNAME::myhost2",
            "SERVICEDESC::Disk Space",
            "SERVICEPERFDATA::" + " ".join([
                "/=5477MB;6450;7256;0;8063",
                "/dev=0MB;2970;3341;0;3713",
                "/dev/shm=0MB;3080;3465;0;3851",
                "/var/run=0MB;3080;3465;0;3851",
                "/var/lock=0MB;3080;3465;0;3851",
                "/lib/init/rw=0MB;3080;3465;0;3851",
                "/mnt=290MB;338636;380966;0;423296",
                "/data=39812MB;40940;46057;0;51175",
            ]),
            "SERVICECHECKCOMMAND::check_all_disks!20%!10%",
            "HOSTSTATE::UP",
            "HOSTSTATETYPE::HARD",
            "SERVICESTATE::OK",
            "SERVICESTATETYPE::HARD",
        )]

        expected_output = [
            ('nagios.disk_space', 1000000000, 5477., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/',
                'unit': 'MB',
                'warn': '6450',
                'crit': '7256',
                'min': '0',
                'max': '8063',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/dev',
                'unit': 'MB',
                'warn': '2970',
                'crit': '3341',
                'min': '0',
                'max': '3713',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/dev/shm',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/var/run',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/var/lock',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/lib/init/rw',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 290., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/mnt',
                'unit': 'MB',
                'warn': '338636',
                'crit': '380966',
                'min': '0',
                'max': '423296',
            }),
            ('nagios.disk_space', 1000000000, 39812., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/data',
                'unit': 'MB',
                'warn': '40940',
                'crit': '46057',
                'min': '0',
                'max': '51175',
            }),
        ]
        expected_output.sort(key=point_sorter)

        self._write_log(('\t'.join(data) for data in log_data))

        actual_output = dogstream.check(self.agent_config,
                                        move_end=False)['dogstream']
        actual_output.sort(key=point_sorter)

        self.assertEquals(expected_output, actual_output)
Exemple #31
0
    def __init__(self, agentConfig, emitters, systemStats):
        self.agentConfig = agentConfig
        # system stats is generated by config.get_system_stats
        self.agentConfig['system_stats'] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = getOS()
        self.plugins = None
        self.emitters = emitters
        self.metadata_interval = int(
            agentConfig.get('metadata_interval', 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True

        # Unix System Checks
        self._unix_system_checks = {
            'disk': u.Disk(checks_logger),
            'io': u.IO(),
            'load': u.Load(checks_logger),
            'memory': u.Memory(checks_logger),
            'network': u.Network(checks_logger),
            'processes': u.Processes(),
            'cpu': u.Cpu(checks_logger)
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            'disk': w32.Disk(checks_logger),
            'io': w32.IO(checks_logger),
            'proc': w32.Processes(checks_logger),
            'memory': w32.Memory(checks_logger),
            'network': w32.Network(checks_logger),
            'cpu': w32.Cpu(checks_logger)
        }

        # Old-style metric checks
        self._couchdb = CouchDb(checks_logger)
        self._mongodb = MongoDb(checks_logger)
        self._mysql = MySql(checks_logger)
        self._rabbitmq = RabbitMq()
        self._ganglia = Ganglia(checks_logger)
        self._cassandra = Cassandra()
        self._dogstream = Dogstreams.init(checks_logger, self.agentConfig)
        self._ddforwarder = DdForwarder(checks_logger, self.agentConfig)
        self._ec2 = EC2(checks_logger)

        # Metric Checks
        self._metrics_checks = [
            ElasticSearch(checks_logger),
            Jvm(checks_logger),
            Tomcat(checks_logger),
            ActiveMQ(checks_logger),
            Solr(checks_logger),
            WMICheck(checks_logger),
            Memcache(checks_logger),
        ]

        # Custom metric checks
        for module_spec in [
                s.strip()
                for s in self.agentConfig.get('custom_checks', '').split(',')
        ]:
            if len(module_spec) == 0: continue
            try:
                self._metrics_checks.append(
                    modules.load(module_spec, 'Check')(checks_logger))
                logger.info("Registered custom check %s" % module_spec)
            except Exception, e:
                logger.exception('Unable to load custom check module %s' %
                                 module_spec)
Exemple #32
0
    def __init__(self, agentConfig, emitters, systemStats, hostname):
        self.emit_duration = None
        self.agentConfig = agentConfig
        self.hostname = hostname
        # system stats is generated by config.get_system_stats
        self.agentConfig["system_stats"] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters
        self.check_timings = agentConfig.get("check_timings")
        self.push_times = {
            "host_metadata": {"start": time.time(), "interval": int(agentConfig.get("metadata_interval", 4 * 60 * 60))},
            "external_host_tags": {
                "start": time.time() - 3 * 60,  # Wait for the checks to init
                "interval": int(agentConfig.get("external_host_tags", 5 * 60)),
            },
            "agent_checks": {"start": time.time(), "interval": int(agentConfig.get("agent_checks_interval", 10 * 60))},
            "processes": {"start": time.time(), "interval": int(agentConfig.get("processes_interval", 60))},
        }
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.hostname_metadata_cache = None
        self.initialized_checks_d = []
        self.init_failed_checks_d = {}

        # Unix System Checks
        self._unix_system_checks = {
            "io": u.IO(log),
            "load": u.Load(log),
            "memory": u.Memory(log),
            "processes": u.Processes(log),
            "cpu": u.Cpu(log),
            "system": u.System(log),
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            "io": w32.IO(log),
            "proc": w32.Processes(log),
            "memory": w32.Memory(log),
            "network": w32.Network(log),
            "cpu": w32.Cpu(log),
            "system": w32.System(log),
        }

        # Old-style metric checks
        self._ganglia = Ganglia(log) if self.agentConfig.get("ganglia_host", "") != "" else None
        self._dogstream = None if self.agentConfig.get("dogstreams") is None else Dogstreams.init(log, self.agentConfig)

        # Agent performance metrics check
        self._agent_metrics = None

        self._metrics_checks = []

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get("custom_checks", "").split(",")]:
            if len(module_spec) == 0:
                continue
            try:
                self._metrics_checks.append(modules.load(module_spec, "Check")(log))
                log.info("Registered custom check %s" % module_spec)
                log.warning(
                    "Old format custom checks are deprecated. They should be moved to the checks.d interface as old custom checks will be removed in a next version"
                )
            except Exception:
                log.exception("Unable to load custom check module %s" % module_spec)
Exemple #33
0
    def __init__(self, agentConfig, emitters, systemStats):
        self.emit_duration = None
        self.agentConfig = agentConfig
        # system stats is generated by config.get_system_stats
        self.agentConfig["system_stats"] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters
        self.metadata_interval = int(agentConfig.get("metadata_interval", 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.metadata_cache = None
        self.checks_d = []

        # Unix System Checks
        self._unix_system_checks = {
            "disk": u.Disk(log),
            "io": u.IO(log),
            "load": u.Load(log),
            "memory": u.Memory(log),
            "network": u.Network(log),
            "processes": u.Processes(log),
            "cpu": u.Cpu(log),
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            "disk": w32.Disk(log),
            "io": w32.IO(log),
            "proc": w32.Processes(log),
            "memory": w32.Memory(log),
            "network": w32.Network(log),
            "cpu": w32.Cpu(log),
        }

        # Old-style metric checks
        self._mongodb = MongoDb(log)
        self._mysql = MySql(log)
        self._rabbitmq = RabbitMq()
        self._ganglia = Ganglia(log)
        self._cassandra = Cassandra()
        self._dogstream = Dogstreams.init(log, self.agentConfig)
        self._ddforwarder = DdForwarder(log, self.agentConfig)
        self._ec2 = EC2(log)

        # Agent Metrics
        self._agent_metrics = CollectorMetrics(log)

        # Metric Checks
        self._metrics_checks = [Memcache(log)]

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get("custom_checks", "").split(",")]:
            if len(module_spec) == 0:
                continue
            try:
                self._metrics_checks.append(modules.load(module_spec, "Check")(log))
                log.info("Registered custom check %s" % module_spec)
            except Exception, e:
                log.exception("Unable to load custom check module %s" % module_spec)
Exemple #34
0
    def run(self, checksd=None, start_event=True, configs_reloaded=False):
        """
        Collect data from each check and submit their data.
        """
        log.debug("Found {num_checks} checks".format(
            num_checks=len(checksd['initialized_checks'])))
        timer = Timer()
        if not Platform.is_windows():
            cpu_clock = time.clock()
        self.run_count += 1
        log.debug("Starting collection run #%s" % self.run_count)

        if checksd:
            self.initialized_checks_d = checksd[
                'initialized_checks']  # is a list of AgentCheck instances
            self.init_failed_checks_d = checksd[
                'init_failed_checks']  # is of type {check_name: {error, traceback}}

        payload = AgentPayload()

        # Find the AgentMetrics check and pop it out
        # This check must run at the end of the loop to collect info on agent performance
        if not self._agent_metrics or configs_reloaded:
            for check in self.initialized_checks_d:
                if check.name == AGENT_METRICS_CHECK_NAME:
                    self._agent_metrics = check
                    self.initialized_checks_d.remove(check)
                    break

        # Initialize payload
        self._build_payload(payload)

        metrics = payload['metrics']
        events = payload['events']
        service_checks = payload['service_checks']

        # Run the system checks. Checks will depend on the OS
        if Platform.is_windows():
            # Win32 system checks
            for check_name in ['memory', 'cpu', 'io', 'proc', 'system']:
                try:
                    metrics.extend(self._win32_system_checks[check_name].check(
                        self.agentConfig))
                except Exception:
                    log.exception('Unable to get %s metrics', check_name)
        else:
            # Unix system checks
            sys_checks = self._unix_system_checks

            for check_name in ['load', 'system', 'cpu', 'file_handles']:
                try:
                    result_check = sys_checks[check_name].check(
                        self.agentConfig)
                    if result_check:
                        payload.update(result_check)
                except Exception:
                    log.exception('Unable to get %s metrics', check_name)

            try:
                memory = sys_checks['memory'].check(self.agentConfig)
            except Exception:
                log.exception('Unable to get memory metrics')
            else:
                if memory:
                    memstats = {
                        'memPhysUsed': memory.get('physUsed'),
                        'memPhysPctUsable': memory.get('physPctUsable'),
                        'memPhysFree': memory.get('physFree'),
                        'memPhysTotal': memory.get('physTotal'),
                        'memPhysUsable': memory.get('physUsable'),
                        'memSwapUsed': memory.get('swapUsed'),
                        'memSwapFree': memory.get('swapFree'),
                        'memSwapPctFree': memory.get('swapPctFree'),
                        'memSwapTotal': memory.get('swapTotal'),
                        'memCached': memory.get('physCached'),
                        'memBuffers': memory.get('physBuffers'),
                        'memShared': memory.get('physShared'),
                        'memSlab': memory.get('physSlab'),
                        'memPageTables': memory.get('physPageTables'),
                        'memSwapCached': memory.get('swapCached')
                    }
                    payload.update(memstats)

            try:
                ioStats = sys_checks['io'].check(self.agentConfig)
            except Exception:
                log.exception('Unable to get io metrics')
            else:
                if ioStats:
                    payload['ioStats'] = ioStats

            try:
                processes = sys_checks['processes'].check(self.agentConfig)
            except Exception:
                log.exception('Unable to get processes metrics')
            else:
                payload.update({'processes': processes})

        # Run old-style checks
        if self._ganglia is not None:
            payload['ganglia'] = self._ganglia.check(self.agentConfig)
        if self._dogstream is not None:
            # every 10 run (~2min) we reload the list of files watched by
            # dogstream
            if (self.run_count % 10) == 0:
                log.info("reloading list of files watched by Dogstreams")
                self._dogstream = Dogstreams.init(log, self.agentConfig)
            dogstreamData = self._dogstream.check(self.agentConfig)
            dogstreamEvents = dogstreamData.get('dogstreamEvents', None)
            if dogstreamEvents:
                if 'dogstream' in payload['events']:
                    events['dogstream'].extend(dogstreamEvents)
                else:
                    events['dogstream'] = dogstreamEvents
                del dogstreamData['dogstreamEvents']

            payload.update(dogstreamData)

        # process collector of gohai (compliant with payload of legacy "resources checks")
        if not Platform.is_windows() and self._should_send_additional_data(
                'processes'):
            gohai_processes = self._run_gohai_processes()
            if gohai_processes:
                try:
                    gohai_processes_json = json.loads(gohai_processes)
                    processes_snaps = gohai_processes_json.get('processes')
                    if processes_snaps:
                        processes_payload = {'snaps': [processes_snaps]}

                        payload['resources'] = {
                            'processes': processes_payload,
                            'meta': {
                                'host': self.hostname,
                            }
                        }
                except Exception:
                    log.exception("Error running gohai processes collection")

        # newer-style checks (not checks.d style)
        for metrics_check in self._metrics_checks:
            res = metrics_check.check(self.agentConfig)
            if res:
                metrics.extend(res)

        # Use `info` log level for some messages on the first run only, then `debug`
        log_at_first_run = log.info if self._is_first_run() else log.debug

        # checks.d checks
        check_statuses = []
        for check in self.initialized_checks_d:
            if not self.continue_running:
                return
            log_at_first_run("Running check %s", check.name)
            instance_statuses = []
            metric_count = 0
            event_count = 0
            service_check_count = 0
            check_start_time = time.time()
            check_stats = None

            try:
                # Run the check.
                instance_statuses = check.run()

                # Collect the metrics and events.
                current_check_metrics = check.get_metrics()
                current_check_events = check.get_events()
                check_stats = check._get_internal_profiling_stats()

                # Collect metadata
                current_check_metadata = check.get_service_metadata()

                # Save metrics & events for the payload.
                metrics.extend(current_check_metrics)
                if current_check_events:
                    if check.name not in events:
                        events[check.name] = current_check_events
                    else:
                        events[check.name] += current_check_events

                # Save the status of the check.
                metric_count = len(current_check_metrics)
                event_count = len(current_check_events)

            except Exception:
                log.exception("Error running check %s" % check.name)

            check_status = CheckStatus(
                check.name,
                instance_statuses,
                metric_count,
                event_count,
                service_check_count,
                service_metadata=current_check_metadata,
                library_versions=check.get_library_info(),
                source_type_name=check.SOURCE_TYPE_NAME or check.name,
                check_stats=check_stats,
                check_version=check.check_version)

            # Service check for Agent checks failures
            service_check_tags = ["check:%s" % check.name]
            if check_status.status == STATUS_OK:
                status = AgentCheck.OK
            elif check_status.status == STATUS_ERROR:
                status = AgentCheck.CRITICAL
            check.service_check('datadog.agent.check_status',
                                status,
                                tags=service_check_tags)

            # Collect the service checks and save them in the payload
            current_check_service_checks = check.get_service_checks()
            if current_check_service_checks:
                service_checks.extend(current_check_service_checks)
            # -1 because the user doesn't care about the service check for check failure
            service_check_count = len(current_check_service_checks) - 1

            # Update the check status with the correct service_check_count
            check_status.service_check_count = service_check_count
            check_statuses.append(check_status)

            check_run_time = time.time() - check_start_time
            log.debug("Check %s ran in %.2f s" % (check.name, check_run_time))

            # Intrument check run timings if enabled.
            if self.check_timings:
                metric = 'datadog.agent.check_run_time'
                meta = {'tags': ["check:%s" % check.name]}
                metrics.append((metric, time.time(), check_run_time, meta))

        for check_name, info in self.init_failed_checks_d.iteritems():
            if not self.continue_running:
                return
            check_status = CheckStatus(check_name,
                                       None,
                                       None,
                                       None,
                                       None,
                                       check_version=info.get(
                                           'version', 'unknown'),
                                       init_failed_error=info['error'],
                                       init_failed_traceback=info['traceback'])
            check_statuses.append(check_status)

        # Add a service check for the agent
        service_checks.append(
            create_service_check('datadog.agent.up',
                                 AgentCheck.OK,
                                 hostname=self.hostname))

        # Store the metrics and events in the payload.
        payload['metrics'] = metrics
        payload['events'] = events
        payload['service_checks'] = service_checks

        # Populate metadata
        self._populate_payload_metadata(payload, check_statuses, start_event)

        collect_duration = timer.step()

        if self._agent_metrics:
            metric_context = {
                'collection_time': collect_duration,
                'emit_time': self.emit_duration,
            }
            if not Platform.is_windows():
                metric_context['cpu_time'] = time.clock() - cpu_clock

            self._agent_metrics.set_metric_context(payload, metric_context)
            self._agent_metrics.run()
            agent_stats = self._agent_metrics.get_metrics()
            payload['metrics'].extend(agent_stats)
            if self.agentConfig.get('developer_mode'):
                log.debug("\n Agent developer mode stats: \n {0}".format(
                    Collector._stats_for_display(agent_stats)))
            # Flush metadata for the Agent Metrics check. Otherwise they'll just accumulate and leak.
            self._agent_metrics.get_service_metadata()

        # Let's send our payload
        emitter_statuses = payload.emit(log, self.agentConfig, self.emitters,
                                        self.continue_running)
        self.emit_duration = timer.step()

        if self._is_first_run():
            # This is not the exact payload sent to the backend as minor post
            # processing is done, but this will give us a good idea of what is sent
            # to the backend.
            data = payload.payload  # deep copy and merge of meta and metric data
            data['apiKey'] = '*************************' + data.get(
                'apiKey', '')[-5:]
            # removing unused keys for the metadata payload
            del data['metrics']
            del data['events']
            del data['service_checks']
            if data.get('processes'):
                data['processes'][
                    'apiKey'] = '*************************' + data[
                        'processes'].get('apiKey', '')[-5:]
            log.debug("Metadata payload: %s", json.dumps(data))

        # Persist the status of the collection run.
        try:
            CollectorStatus(check_statuses, emitter_statuses,
                            self.hostname_metadata_cache).persist()
        except Exception:
            log.exception("Error persisting collector status")

        if self.run_count <= FLUSH_LOGGING_INITIAL or self.run_count % FLUSH_LOGGING_PERIOD == 0:
            log.info("Finished run #%s. Collection time: %ss. Emit time: %ss" %
                     (self.run_count, round(collect_duration,
                                            2), round(self.emit_duration, 2)))
            if self.run_count == FLUSH_LOGGING_INITIAL:
                log.info(
                    "First flushes done, next flushes will be logged every %s flushes."
                    % FLUSH_LOGGING_PERIOD)
        else:
            log.debug(
                "Finished run #%s. Collection time: %ss. Emit time: %ss" %
                (self.run_count, round(collect_duration,
                                       2), round(self.emit_duration, 2)))

        return payload
Exemple #35
0
    def __init__(self, agentConfig, emitters, systemStats):
        self.agentConfig = agentConfig
        # system stats is generated by config.get_system_stats
        self.agentConfig['system_stats'] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = getOS()
        self.plugins = None
        self.emitters = emitters            
        self.metadata_interval = int(agentConfig.get('metadata_interval', 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        
        # Unix System Checks
        self._unix_system_checks = {
            'disk': u.Disk(checks_logger),
            'io': u.IO(),
            'load': u.Load(checks_logger),
            'memory': u.Memory(checks_logger),
            'network': u.Network(checks_logger),
            'processes': u.Processes(),
            'cpu': u.Cpu(checks_logger)
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            'disk': w32.Disk(checks_logger),
            'io': w32.IO(checks_logger),
            'proc': w32.Processes(checks_logger),
            'memory': w32.Memory(checks_logger),
            'network': w32.Network(checks_logger),
            'cpu': w32.Cpu(checks_logger)
        }

        # Old-style metric checks
        self._couchdb = CouchDb(checks_logger)
        self._mongodb = MongoDb(checks_logger)
        self._mysql = MySql(checks_logger)
        self._rabbitmq = RabbitMq()
        self._ganglia = Ganglia(checks_logger)
        self._cassandra = Cassandra()
        self._dogstream = Dogstreams.init(checks_logger, self.agentConfig)
        self._ddforwarder = DdForwarder(checks_logger, self.agentConfig)
        self._ec2 = EC2(checks_logger)

        # Metric Checks
        self._metrics_checks = [
            ElasticSearch(checks_logger),
            WMICheck(checks_logger),
            Memcache(checks_logger),
        ]

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]:
            if len(module_spec) == 0: continue
            try:
                self._metrics_checks.append(modules.load(module_spec, 'Check')(checks_logger))
                logger.info("Registered custom check %s" % module_spec)
            except Exception, e:
                logger.exception('Unable to load custom check module %s' % module_spec)
Exemple #36
0
    def test_service_perfdata(self):
        from checks.datadog import NagiosServicePerfData

        self._write_nagios_config([
            "service_perfdata_file=%s" % self.log_file.name,
            "service_perfdata_file_template=DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$",
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosServicePerfData], [d.__class__ for d in dogstream.dogstreams])

        log_data = [
            ("DATATYPE::SERVICEPERFDATA", 
             "TIMET::1000000000", 
             "HOSTNAME::myhost0", 
             "SERVICEDESC::Pgsql Backends", 
             "SERVICEPERFDATA::" + " ".join([
                "time=0.06", 
                "db0=33;180;190;0;200", 
                "db1=1;150;190;0;200",
                "db2=0;120;290;1;200", 
                "db3=0;110;195;5;100"
             ]),
             "SERVICECHECKCOMMAND::check_nrpe_1arg!check_postgres_backends",
             "HOSTSTATE::UP",   
             "HOSTSTATETYPE::HARD",
             "SERVICESTATE::OK",
             "SERVICESTATETYPE::HARD",
            ),
        ]
        
        expected_output = [
            ('nagios.pgsql_backends.time', 1000000000, 0.06, {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
            }),
            ('nagios.pgsql_backends.db0',  1000000000,   33., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '180',
                'crit': '190',
                'min':    '0',
                'max':  '200',
            }),
            ('nagios.pgsql_backends.db1',  1000000000,    1., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '150',
                'crit': '190',
                'min':    '0',
                'max':  '200',
            }),
            ('nagios.pgsql_backends.db2',  1000000000,    0., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '120',
                'crit': '290',
                'min':    '1',
                'max':  '200',
            }),
            ('nagios.pgsql_backends.db3',  1000000000,    0., {
                'metric_type': 'gauge',
                'host_name': 'myhost0',
                'warn': '110',
                'crit': '195',
                'min':    '5',
                'max':  '100',
            }),
        ]
        expected_output.sort(key=point_sorter)

        self._write_log(('\t'.join(data) for data in log_data))        

        actual_output = dogstream.check(self.agent_config, move_end=False)['dogstream']
        actual_output.sort(key=point_sorter)

        self.assertEquals(expected_output, actual_output)
Exemple #37
0
    def __init__(self, agentConfig, emitters, systemStats, hostname):
        self.emit_duration = None
        self.agentConfig = agentConfig
        self.hostname = hostname
        # system stats is generated by config.get_system_stats
        self.agentConfig['system_stats'] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters
        self.check_timings = agentConfig.get('check_timings')
        self.push_times = {
            'host_metadata': {
                'start': time.time(),
                'interval': int(agentConfig.get('metadata_interval', 4 * 60 * 60))
            },
            'external_host_tags': {
                'start': time.time() - 3 * 60,  # Wait for the checks to init
                'interval': int(agentConfig.get('external_host_tags', 5 * 60))
            },
            'agent_checks': {
                'start': time.time(),
                'interval': int(agentConfig.get('agent_checks_interval', 10 * 60))
            },
            'processes': {
                'start': time.time(),
                'interval': int(agentConfig.get('processes_interval', 60))
            }
        }
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.hostname_metadata_cache = None
        self.initialized_checks_d = []
        self.init_failed_checks_d = {}

        if Platform.is_linux() and psutil is not None:
            procfs_path = agentConfig.get('procfs_path', '/proc').rstrip('/')
            psutil.PROCFS_PATH = procfs_path

        # Unix System Checks
        self._unix_system_checks = {
            'io': u.IO(log),
            'load': u.Load(log),
            'memory': u.Memory(log),
            'processes': u.Processes(log),
            'cpu': u.Cpu(log),
            'system': u.System(log)
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            'io': w32.IO(log),
            'proc': w32.Processes(log),
            'memory': w32.Memory(log),
            'network': w32.Network(log),
            'cpu': w32.Cpu(log),
            'system': w32.System(log)
        }

        # Old-style metric checks
        self._ganglia = Ganglia(log) if self.agentConfig.get('ganglia_host', '') != '' else None
        self._dogstream = None if self.agentConfig.get('dogstreams') is None else Dogstreams.init(log, self.agentConfig)

        # Agent performance metrics check
        self._agent_metrics = None

        self._metrics_checks = []

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]:
            if len(module_spec) == 0:
                continue
            try:
                self._metrics_checks.append(modules.load(module_spec, 'Check')(log))
                log.info("Registered custom check %s" % module_spec)
                log.warning("Old format custom checks are deprecated. They should be moved to the checks.d interface as old custom checks will be removed in a next version")
            except Exception:
                log.exception('Unable to load custom check module %s' % module_spec)
Exemple #38
0
    def test_service_perfdata_special_cases(self):
        from checks.datadog import NagiosServicePerfData

        self._write_nagios_config([
            "service_perfdata_file=%s" % self.log_file.name,
            "service_perfdata_file_template=DATATYPE::SERVICEPERFDATA\tTIMET::$TIMET$\tHOSTNAME::$HOSTNAME$\tSERVICEDESC::$SERVICEDESC$\tSERVICEPERFDATA::$SERVICEPERFDATA$\tSERVICECHECKCOMMAND::$SERVICECHECKCOMMAND$\tHOSTSTATE::$HOSTSTATE$\tHOSTSTATETYPE::$HOSTSTATETYPE$\tSERVICESTATE::$SERVICESTATE$\tSERVICESTATETYPE::$SERVICESTATETYPE$",
        ])

        dogstream = Dogstreams.init(self.logger, self.agent_config)
        self.assertEquals([NagiosServicePerfData], [d.__class__ for d in dogstream.dogstreams])

        log_data = [
            (   "DATATYPE::SERVICEPERFDATA",
                "TIMET::1000000000",
                "HOSTNAME::myhost2",
                "SERVICEDESC::Disk Space",
                "SERVICEPERFDATA::" + " ".join([
                    "/=5477MB;6450;7256;0;8063",
                    "/dev=0MB;2970;3341;0;3713",
                    "/dev/shm=0MB;3080;3465;0;3851",
                    "/var/run=0MB;3080;3465;0;3851",
                    "/var/lock=0MB;3080;3465;0;3851",
                    "/lib/init/rw=0MB;3080;3465;0;3851",
                    "/mnt=290MB;338636;380966;0;423296",
                    "/data=39812MB;40940;46057;0;51175",
                ]),
                "SERVICECHECKCOMMAND::check_all_disks!20%!10%",
                "HOSTSTATE::UP",
                "HOSTSTATETYPE::HARD",
                "SERVICESTATE::OK",
                "SERVICESTATETYPE::HARD",
            )
        ]
        
        expected_output = [
            ('nagios.disk_space', 1000000000, 5477., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/',
                'unit': 'MB',
                'warn': '6450',
                'crit': '7256',
                'min': '0',
                'max': '8063',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/dev',
                'unit': 'MB',
                'warn': '2970',
                'crit': '3341',
                'min': '0',
                'max': '3713',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/dev/shm',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/var/run',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/var/lock',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 0., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/lib/init/rw',
                'unit': 'MB',
                'warn': '3080',
                'crit': '3465',
                'min': '0',
                'max': '3851',
            }),
            ('nagios.disk_space', 1000000000, 290., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/mnt',
                'unit': 'MB',
                'warn': '338636',
                'crit': '380966',
                'min': '0',
                'max': '423296',
            }),
            ('nagios.disk_space', 1000000000, 39812., {
                'metric_type': 'gauge',
                'host_name': 'myhost2',
                'device_name': '/data',
                'unit': 'MB',
                'warn': '40940',
                'crit': '46057',
                'min': '0',
                'max': '51175',
            }),
        ]
        expected_output.sort(key=point_sorter)

        self._write_log(('\t'.join(data) for data in log_data))        

        actual_output = dogstream.check(self.agent_config, move_end=False)['dogstream']
        actual_output.sort(key=point_sorter)

        self.assertEquals(expected_output, actual_output)
Exemple #39
0
    def __init__(self, agentConfig, emitters, systemStats, hostname):
        self.emit_duration = None
        self.agentConfig = agentConfig
        self.hostname = hostname
        # system stats is generated by config.get_system_stats
        self.agentConfig['system_stats'] = systemStats
        # agent config is used during checks, system_stats can be accessed through the config
        self.os = get_os()
        self.plugins = None
        self.emitters = emitters
        self.check_timings = agentConfig.get('check_timings')
        self.push_times = {
            'host_metadata': {
                'start': time.time(),
                'interval': int(agentConfig.get('metadata_interval', 4 * 60 * 60))
            },
            'external_host_tags': {
                'start': time.time() - 3 * 60,  # Wait for the checks to init
                'interval': int(agentConfig.get('external_host_tags', 5 * 60))
            },
            'agent_checks': {
                'start': time.time(),
                'interval': int(agentConfig.get('agent_checks_interval', 10 * 60))
            },
            'processes': {
                'start': time.time(),
                'interval': int(agentConfig.get('processes_interval', 60))
            }
        }
        socket.setdefaulttimeout(15)
        self.run_count = 0
        self.continue_running = True
        self.hostname_metadata_cache = None
        self.initialized_checks_d = []
        self.init_failed_checks_d = {}

        # Unix System Checks
        self._unix_system_checks = {
            'io': u.IO(log),
            'load': u.Load(log),
            'memory': u.Memory(log),
            'processes': u.Processes(log),
            'cpu': u.Cpu(log),
            'system': u.System(log)
        }

        # Win32 System `Checks
        self._win32_system_checks = {
            'io': w32.IO(log),
            'proc': w32.Processes(log),
            'memory': w32.Memory(log),
            'network': w32.Network(log),
            'cpu': w32.Cpu(log),
            'system': w32.System(log)
        }

        # Old-style metric checks
        self._ganglia = Ganglia(log)
        self._dogstream = Dogstreams.init(log, self.agentConfig)
        self._ddforwarder = DdForwarder(log, self.agentConfig)

        # Agent performance metrics check
        self._agent_metrics = None

        self._metrics_checks = []

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]:
            if len(module_spec) == 0:
                continue
            try:
                self._metrics_checks.append(modules.load(module_spec, 'Check')(log))
                log.info("Registered custom check %s" % module_spec)
                log.warning("Old format custom checks are deprecated. They should be moved to the checks.d interface as old custom checks will be removed in a next version")
            except Exception:
                log.exception('Unable to load custom check module %s' % module_spec)
Exemple #40
0
    def __init__(self, agentConfig, emitters):
        self.agentConfig = agentConfig
        self.os = getOS()
        self.plugins = None
        self.emitters = emitters            
        self.checksLogger = logging.getLogger('checks')
        self.metadata_interval = int(agentConfig.get('metadata_interval', 10 * 60))
        self.metadata_start = time.time()
        socket.setdefaulttimeout(15)
        
        # Unix System Checks
        self._unix_system_checks = {
            'disk': u.Disk(self.checksLogger),
            'io': u.IO(),
            'load': u.Load(self.checksLogger),
            'memory': u.Memory(self.checksLogger),
            'network': u.Network(self.checksLogger),
            'processes': u.Processes(),
            'cpu': u.Cpu()
        }

        # Win32 System Checks
        self._win32_system_checks = {
            'disk': w32.Disk(self.checksLogger),
            'io': w32.IO(self.checksLogger),
            'proc': w32.Processes(self.checksLogger),
            'memory': w32.Memory(self.checksLogger),
            'network': w32.Network(self.checksLogger),
            'cpu': w32.Cpu(self.checksLogger)
        }

        # Old-style metric checks
        self._apache = Apache(self.checksLogger)
        self._couchdb = CouchDb(self.checksLogger)
        self._mongodb = MongoDb(self.checksLogger)
        self._mysql = MySql(self.checksLogger)
        self._rabbitmq = RabbitMq()
        self._ganglia = Ganglia(self.checksLogger)
        self._cassandra = Cassandra()
        self._dogstream = Dogstreams.init(self.checksLogger, self.agentConfig)
        self._ddforwarder = DdForwarder(self.checksLogger, self.agentConfig)
        self._ec2 = EC2(self.checksLogger)

        # Metric Checks
        self._metrics_checks = [
            Varnish(self.checksLogger),
            ElasticSearch(self.checksLogger),
            Jvm(self.checksLogger),
            Tomcat(self.checksLogger),
            ActiveMQ(self.checksLogger),
            Solr(self.checksLogger),
            WMICheck(self.checksLogger),
            Nginx(self.checksLogger),
            Memcache(self.checksLogger),
        ]

        # Custom metric checks
        for module_spec in [s.strip() for s in self.agentConfig.get('custom_checks', '').split(',')]:
            if len(module_spec) == 0: continue
            try:
                self._metrics_checks.append(modules.load(module_spec, 'Check')(self.checksLogger))
                self.checksLogger.info("Registered custom check %s" % module_spec)
            except Exception, e:
                self.checksLogger.exception('Unable to load custom check module %s' % module_spec)