Beispiel #1
0
 def _postMetrics(self):
     if len(self._metrics) > 0:
         self._metrics['uuid'] = getUuid()
         self._metrics['internalHostname'] = gethostname(self._agentConfig)
         self._metrics['apiKey'] = self._agentConfig['api_key']
         MetricTransaction(self._metrics)
         self._metrics = {}
def init(config_path=None, use_watchdog=False, use_forwarder=False):
    c = get_config(parse_args=False, cfg_path=config_path, init_logging=True)

    logger.debug("Configuration dogstatsd")

    port      = c['dogstatsd_port']
    interval  = int(c['dogstatsd_interval'])
    normalize = c['dogstatsd_normalize']
    api_key   = c['api_key']

    target = c['dd_url']
    if use_forwarder:
        target = c['dogstatsd_target'] 

    hostname = gethostname(c)

    # Create the aggregator (which is the point of communication between the
    # server and reporting threads.
    normalization_factor = 1.0
    if normalize:
        normalization_factor = 1.0 / interval
    aggregator = MetricsAggregator(hostname, normalization_factor)

    # Start the reporting thread.
    reporter = Reporter(interval, aggregator, target, api_key, use_watchdog)

    # Start the server.
    server_host = ''
    server = Server(aggregator, server_host, port)

    return reporter, server
Beispiel #3
0
    def check(self, logger, agentConfig):
        logger.debug('getProcesses: start')

        # Get output from ps
        try:
            ps = subprocess.Popen(['ps', 'auxww'],
                                  stdout=subprocess.PIPE,
                                  close_fds=True).communicate()[0]
        except:
            logger.exception('getProcesses')
            return False

        # Split out each process
        processLines = ps.split('\n')

        del processLines[0]  # Removes the headers
        processLines.pop()  # Removes a trailing empty line

        processes = []

        logger.debug('getProcesses: Popen success, parsing, looping')

        for line in processLines:
            line = line.split(None, 10)
            processes.append(map(lambda s: s.strip(), line))

        logger.debug('getProcesses: completed, returning')

        return {
            'processes': processes,
            'apiKey': agentConfig['api_key'],
            'host': gethostname(agentConfig)
        }
Beispiel #4
0
    def _process_data(self, data, tags=None):
        for node in data['nodes']:
            node_data = data['nodes'][node]

            def process_metric(metric, xtype, path, xform=None):
                # closure over node_data
                self._process_metric(node_data, metric, path, xform, tags=tags)

            if 'hostname' in node_data:
                # For ES >= 0.19
                hostnames = (
                    gethostname(self.agentConfig).decode('utf-8'),
                    socket.gethostname().decode('utf-8'),
                    socket.getfqdn().decode('utf-8')
                )
                if node_data['hostname'].decode('utf-8') in hostnames:
                    self._map_metric(process_metric)
            else:
                # ES < 0.19
                # Fetch interface address from ifconfig or ip addr and check
                # against the primary IP from ES
                try:
                    base_url = self._base_es_url(self.agentConfig['elasticsearch'])
                    url = "%s%s" % (base_url, NODES_URL)
                    primary_addr = self._get_primary_addr(self.agentConfig, url, node)
                except NodeNotFound:
                    # Skip any nodes that aren't found
                    continue
                if self._host_matches_node(primary_addr):
                    self._map_metric(process_metric)
Beispiel #5
0
def main(config_path=None):

    c = get_config(parse_args=False, cfg_path=config_path, init_logging=True)

    port = c["dogstatsd_port"]
    target = c["dogstatsd_target"]
    interval = c["dogstatsd_interval"]
    api_key = c["api_key"]
    host = "localhost"

    hostname = gethostname(c)
    rollup_interval = 10

    # Create the aggregator (which is the point of communication between the
    # server and reporting threads.
    aggregator = MetricsAggregator(hostname, rollup_interval)

    # Start the reporting thread.
    reporter = Reporter(interval, aggregator, target, api_key)
    reporter.start()

    # Start the server.
    server_host = ""
    server = Server(aggregator, server_host, port)
    server.start()

    # If we're here, we're done.
    logger.info("Shutting down ...")
Beispiel #6
0
    def _create_event(self, status):
        hostname = gethostname(self.agentConfig).decode('utf-8')
        if status == "red":
            alert_type = "error"
            msg_title = "%s is %s" % (hostname, status)

        elif status == "yellow":
            alert_type = "warning"
            msg_title = "%s is %s" % (hostname, status)

        else:
            # then it should be green
            alert_type = "info"
            msg_title = "%s recovered as %s" % (hostname, status)

        msg = "ElasticSearch: %s just reported as %s" % (hostname, status)

        return { 'timestamp': int(time.mktime(datetime.utcnow().timetuple())),
                 'event_type': 'elasticsearch',
                 'host': hostname,
                 'api_key': self.agentConfig['api_key'],
                 'msg_text':msg,
                 'msg_title': msg_title,
                 "alert_type": alert_type,
                 "source_type_name": "elasticsearch",
                 "event_object": hostname
            }
def init(config_path=None, use_watchdog=False, use_forwarder=False):
    c = get_config(parse_args=False, cfg_path=config_path, init_logging=True)

    logger.debug("Configuration dogstatsd")

    port = c['dogstatsd_port']
    interval = int(c['dogstatsd_interval'])
    normalize = c['dogstatsd_normalize']
    api_key = c['api_key']

    target = c['dd_url']
    if use_forwarder:
        target = c['dogstatsd_target']

    hostname = gethostname(c)

    # Create the aggregator (which is the point of communication between the
    # server and reporting threads.
    normalization_factor = 1.0
    if normalize:
        normalization_factor = 1.0 / interval
    aggregator = MetricsAggregator(hostname, normalization_factor)

    # Start the reporting thread.
    reporter = Reporter(interval, aggregator, target, api_key, use_watchdog)

    # Start the server.
    server_host = ''
    server = Server(aggregator, server_host, port)

    return reporter, server
Beispiel #8
0
    def check(self, logger, agentConfig):
        if self.high_watermarks is None:
            # On the first run of check(), prime the high_watermarks dict
            # so that we only send events that occured after the agent
            # started.
            # (Setting high_watermarks in the next statement prevents
            #  any kind of infinite loop (assuming nothing ever sets
            #  high_watermarks to None again!))
            self.high_watermarks = defaultdict(lambda: 0)
            self.check(logger, agentConfig)

        hudson_home = agentConfig.get('hudson_home', None)

        if not hudson_home:
            return False

        job_dirs = glob(os.path.join(hudson_home, 'jobs', '*'))

        build_events = []

        for job_dir in job_dirs:
            for output in self._get_build_results(logger, job_dir):
                output['api_key'] = agentConfig['api_key']
                output['host'] = gethostname(agentConfig)
                build_events.append(output)

        return build_events
Beispiel #9
0
    def check(self, logger, agentConfig):
        if self.high_watermarks is None:
            # On the first run of check(), prime the high_watermarks dict
            # so that we only send events that occured after the agent
            # started.
            # (Setting high_watermarks in the next statement prevents
            #  any kind of infinite loop (assuming nothing ever sets
            #  high_watermarks to None again!))
            self.high_watermarks = defaultdict(lambda: 0)
            self.check(logger, agentConfig)

        hudson_home = agentConfig.get('hudson_home', None)

        if not hudson_home:
            return False

        job_dirs = glob(os.path.join(hudson_home, 'jobs', '*'))

        build_events = []

        for job_dir in job_dirs:
            for output in self._get_build_results(logger, job_dir):
                output['api_key'] = agentConfig['api_key']
                output['host'] = gethostname(agentConfig)
                build_events.append(output)

        return build_events
Beispiel #10
0
def init(config_path=None, use_watchdog=False, use_forwarder=False):
    c = get_config(parse_args=False, cfg_path=config_path)
    log.debug("Configuration dogstatsd")

    port = c["dogstatsd_port"]
    interval = int(c["dogstatsd_interval"])
    normalize = c["dogstatsd_normalize"]
    api_key = c["api_key"]

    target = c["dd_url"]
    if use_forwarder:
        target = c["dogstatsd_target"]

    hostname = gethostname(c)

    # Create the aggregator (which is the point of communication between the
    # server and reporting threads.
    assert 0 < interval
    aggregator = MetricsAggregator(hostname, interval)

    # Start the reporting thread.
    reporter = Reporter(interval, aggregator, target, api_key, use_watchdog)

    # Start the server.
    server_host = ""
    server = Server(aggregator, server_host, port)

    return reporter, server
Beispiel #11
0
 def check(self, logger, agentConfig):
     logger.debug('getProcesses: start')
     
     # Get output from ps
     try:
         ps = subprocess.Popen(['ps', 'auxww'], stdout=subprocess.PIPE, close_fds=True).communicate()[0]
     except:
         logger.exception('getProcesses')
         return False
     
     # Split out each process
     processLines = ps.split('\n')
     
     del processLines[0] # Removes the headers
     processLines.pop() # Removes a trailing empty line
     
     processes = []
     
     logger.debug('getProcesses: Popen success, parsing, looping')
     
     for line in processLines:
         line = line.split(None, 10)
         processes.append(map(lambda s: s.strip(), line))
     
     logger.debug('getProcesses: completed, returning')
     
     return { 'processes':   processes,
              'apiKey':      agentConfig['api_key'],
              'host':        gethostname(agentConfig) }
Beispiel #12
0
def init(config_path=None, use_watchdog=False, use_forwarder=False):
    c = get_config(parse_args=False, cfg_path=config_path)
    log.debug("Configuration dogstatsd")

    port      = c['dogstatsd_port']
    interval  = int(c['dogstatsd_interval'])
    normalize = c['dogstatsd_normalize']
    api_key   = c['api_key']
    non_local_traffic = c['non_local_traffic']

    target = c['dd_url']
    if use_forwarder:
        target = c['dogstatsd_target'] 

    hostname = gethostname(c)

    # Create the aggregator (which is the point of communication between the
    # server and reporting threads.
    assert 0 < interval
    aggregator = MetricsAggregator(hostname, interval)

    # Start the reporting thread.
    reporter = Reporter(interval, aggregator, target, api_key, use_watchdog)

    # Start the server on an IPv4 stack
    # Default to loopback
    server_host = '127.0.0.1'
    # If specified, bind to all addressses
    if non_local_traffic:
        server_host = ''

    server = Server(aggregator, server_host, port)

    return reporter, server
Beispiel #13
0
    def _postMetrics(self):

        if len(self._metrics) > 0:
            self._metrics["uuid"] = getUuid()
            self._metrics["internalHostname"] = gethostname(self._agentConfig)
            self._metrics["apiKey"] = self._agentConfig["api_key"]
            MetricTransaction(self._metrics, {})
            self._metrics = {}
Beispiel #14
0
    def _postMetrics(self):

        if len(self._metrics) > 0:
            self._metrics['uuid'] = get_uuid()
            self._metrics['internalHostname'] = gethostname(self._agentConfig)
            self._metrics['apiKey'] = self._agentConfig['api_key']
            MetricTransaction(self._metrics, {})
            self._metrics = {}
Beispiel #15
0
    def run(self):

        handlers = [
            (r"/intake/?", AgentInputHandler),
            (r"/api/v1/series/?", ApiInputHandler),
            (r"/status/?", StatusHandler),
        ]

        settings = dict(
            cookie_secret="12oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=",
            xsrf_cookies=False,
            debug=False,
        )

        non_local_traffic = self._agentConfig.get("non_local_traffic", False)

        tornado.web.Application.__init__(self, handlers, **settings)
        http_server = tornado.httpserver.HTTPServer(self)
        # non_local_traffic must be == True to match, not just some non-false value
        if non_local_traffic is True:
            http_server.listen(self._port)
        else:
            # localhost in lieu of 127.0.0.1 to support IPv6
            http_server.listen(self._port, address = "localhost")
        logging.info("Listening on port %d" % self._port)

        # Register callbacks
        self.mloop = tornado.ioloop.IOLoop.instance()

        def flush_trs():
            if self._watchdog:
                self._watchdog.reset()
            self._postMetrics()
            self._tr_manager.flush()

        tr_sched = tornado.ioloop.PeriodicCallback(flush_trs,TRANSACTION_FLUSH_INTERVAL,
            io_loop = self.mloop)

        # Register optional Graphite listener
        gport = self._agentConfig.get("graphite_listen_port", None)
        if gport is not None:
            logging.info("Starting graphite listener on port %s" % gport)
            from graphite import GraphiteServer
            gs = GraphiteServer(self, gethostname(self._agentConfig), io_loop=self.mloop)
            if non_local_traffic is True:
                gs.listen(gport)
            else:
                gs.listen(port, address = "localhost")

        # Start everything
        if self._watchdog:
            self._watchdog.reset()
        tr_sched.start()

        self.mloop.start()
        logging.info("Stopped")
Beispiel #16
0
    def run(self):

        handlers = [
            (r"/intake/?", AgentInputHandler),
            (r"/api/v1/series/?", ApiInputHandler),
            (r"/status/?", StatusHandler),
        ]

        settings = dict(
            cookie_secret="12oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=",
            xsrf_cookies=False,
            debug=True,
        )

        tornado.web.Application.__init__(self, handlers, **settings)
        http_server = tornado.httpserver.HTTPServer(self)
        http_server.listen(self._port)
        logging.info("Listening on port %d" % self._port)

        # Register callbacks
        self.mloop = tornado.ioloop.IOLoop.instance()

        def flush_trs():
            if self._watchdog:
                self._watchdog.reset()
            self._postMetrics()
            self._tr_manager.flush()

        tr_sched = tornado.ioloop.PeriodicCallback(flush_trs,
                                                   TRANSACTION_FLUSH_INTERVAL,
                                                   io_loop=self.mloop)

        # Register optional Graphite listener
        gport = self._agentConfig.get("graphite_listen_port", None)
        if gport is not None:
            logging.info("Starting graphite listener on port %s" % gport)
            from graphite import GraphiteServer
            gs = GraphiteServer(self,
                                gethostname(self._agentConfig),
                                io_loop=self.mloop)
            gs.listen(gport)

        # Start everything
        if self._watchdog:
            self._watchdog.reset()
        tr_sched.start()

        self.mloop.start()
        logging.info("Stopped")
Beispiel #17
0
    def _build_payload(self, start_event=True):
        """
        Return an dictionary that contains all of the generic payload data.
        """
        now = time.time()
        payload = {
            "collection_timestamp": now,
            "os": self.os,
            "python": sys.version,
            "agentVersion": self.agentConfig["version"],
            "apiKey": self.agentConfig["api_key"],
            "events": {},
            "metrics": [],
            "resources": {},
            "internalHostname": gethostname(self.agentConfig),
            "uuid": get_uuid(),
        }

        # Include system stats on first postback
        if start_event and self._is_first_run():
            payload["systemStats"] = self.agentConfig.get("system_stats", {})
            # Also post an event in the newsfeed
            payload["events"]["System"] = [
                {
                    "api_key": self.agentConfig["api_key"],
                    "host": payload["internalHostname"],
                    "timestamp": now,
                    "event_type": "Agent Startup",
                    "msg_text": "Version %s" % get_version(),
                }
            ]

        # Periodically send the host metadata.
        if self._is_first_run() or self._should_send_metadata():
            payload["meta"] = self._get_metadata()
            self.metadata_cache = payload["meta"]
            # Add static tags from the configuration file
            if self.agentConfig["tags"] is not None:
                payload["tags"] = self.agentConfig["tags"]

            # Log the metadata on the first run
            if self._is_first_run():
                if self.agentConfig["tags"] is not None:
                    log.info(u"Hostnames: %s, tags: %s" % (repr(self.metadata_cache), self.agentConfig["tags"]))
                else:
                    log.info(u"Hostnames: %s" % repr(self.metadata_cache))

        return payload
Beispiel #18
0
    def run(self):

        handlers = [
            (r"/intake/?", AgentInputHandler),
            (r"/api/v1/series/?", ApiInputHandler),
            (r"/status/?", StatusHandler),
        ]

        settings = dict(
            cookie_secret="12oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=",
            xsrf_cookies=False,
            debug=True,
        )

        tornado.web.Application.__init__(self, handlers, **settings)
        http_server = tornado.httpserver.HTTPServer(self)
        http_server.listen(self._port)
        logging.info("Listening on port %d" % self._port)

        # Register callbacks
        self.mloop = tornado.ioloop.IOLoop.instance()

        def flush_trs():
            if self._watchdog:
                self._watchdog.reset()
            self._postMetrics()
            self._tr_manager.flush()

        tr_sched = tornado.ioloop.PeriodicCallback(flush_trs,TRANSACTION_FLUSH_INTERVAL,
            io_loop = self.mloop)

        # Register optional Graphite listener
        gport = self._agentConfig.get("graphite_listen_port", None)
        if gport is not None:
            logging.info("Starting graphite listener on port %s" % gport)
            from graphite import GraphiteServer
            gs = GraphiteServer(self, gethostname(self._agentConfig), io_loop=self.mloop)
            gs.listen(gport)

        # Start everything
        if self._watchdog:
            self._watchdog.reset()
        tr_sched.start()
        self.mloop.start()
Beispiel #19
0
    def _build_payload(self):
        """
        Return an dictionary that contains all of the generic payload data.
        """

        payload = {
            'collection_timestamp': time.time(),
            'os': self.os,
            'python': sys.version,
            'agentVersion': self.agentConfig['version'],
            'apiKey': self.agentConfig['api_key'],
            'events': {},
            'metrics': [],
            'resources': {},
            'internalHostname': gethostname(self.agentConfig),
            'uuid': get_uuid(),
        }

        # Include system stats on first postback
        if self._is_first_run():
            payload['systemStats'] = self.agentConfig.get('systemStats', {})
            # Also post an event in the newsfeed
            payload['events']['System'] = [{
                'api_key':
                self.agentConfig['api_key'],
                'host':
                payload['internalHostname'],
                'timestamp':
                int(time.mktime(datetime.datetime.now().timetuple())),
                'event_type':
                'Agent Startup',
                'msg_text':
                'Version %s' % get_version()
            }]

        # Periodically send the host metadata.
        if self._is_first_run() or self._should_send_metadata():
            payload['meta'] = self._get_metadata()
            # Add static tags from the configuration file
            if self.agentConfig['tags'] is not None:
                payload['tags'] = self.agentConfig['tags']

        return payload
Beispiel #20
0
    def check(self, agentConfig):
        # Get output from ps
        try:
            ps = subprocess.Popen(["ps", "auxww"], stdout=subprocess.PIPE, close_fds=True).communicate()[0]
        except StandardError:
            self.logger.exception("getProcesses")
            return False

        # Split out each process
        processLines = ps.split("\n")

        del processLines[0]  # Removes the headers
        processLines.pop()  # Removes a trailing empty line

        processes = []

        for line in processLines:
            line = line.split(None, 10)
            processes.append(map(lambda s: s.strip(), line))

        return {"processes": processes, "apiKey": agentConfig["api_key"], "host": gethostname(agentConfig)}
Beispiel #21
0
    def _fetch_rrd_meta(self, agentConfig, whitelist):
        ''' Return a list of list of dicts with host_name, host_desc, device_name, and rrd_path '''
        def _in_whitelist(rrd):
            path = rrd.replace('<path_rra>/','')
            for p in whitelist:
                if fnmatch(path, p):
                    return True
            return False

        c = self.db.cursor()
        c.execute("""
                SELECT
                    h.hostname as host_name,
                    dl.snmp_index as device_name,
                    dt.data_source_path as rrd_path
                FROM data_local dl
                    JOIN host h on dl.host_id = h.id
                    JOIN data_template_data dt on dt.local_data_id = dl.id
                WHERE dt.data_source_path IS NOT NULL
                AND dt.data_source_path != ''
            """)
        res = []
        for host_name, device_name, rrd_path in c.fetchall():
            if not whitelist or _in_whitelist(rrd_path):
                if host_name in ('localhost', '127.0.0.1'):
                    host_name = gethostname(agentConfig)
                res.append({
                    'host_name': host_name,
                    'device_name': device_name or None,
                    'rrd_path': rrd_path.replace('<path_rra>', self.rrd_path)
                })

        # Collect stats
        self._add_stat('cacti.rrd.count', len(res), agentConfig)
        num_hosts = len(set([r['host_name'] for r in res]))
        self._add_stat('cacti.hosts.count', num_hosts, agentConfig)

        return res
Beispiel #22
0
    def _build_payload(self, start_event=True):
        """
        Return an dictionary that contains all of the generic payload data.
        """

        payload = {
            'collection_timestamp': time.time(),
            'os' : self.os,
            'python': sys.version,
            'agentVersion' : self.agentConfig['version'],
            'apiKey': self.agentConfig['api_key'],
            'events': {},
            'metrics': [],
            'resources': {},
            'internalHostname' : gethostname(self.agentConfig),
            'uuid' : get_uuid(),
        }

        # Include system stats on first postback
        if start_event and self._is_first_run():
            payload['systemStats'] = self.agentConfig.get('system_stats', {})
            # Also post an event in the newsfeed
            payload['events']['System'] = [{'api_key': self.agentConfig['api_key'],
                                 'host': payload['internalHostname'],
                                 'timestamp': int(time.mktime(datetime.datetime.now().timetuple())),
                                 'event_type':'Agent Startup',
                                 'msg_text': 'Version %s' % get_version()
                                 }]

        # Periodically send the host metadata.
        if self._is_first_run() or self._should_send_metadata():
            payload['meta'] = self._get_metadata()
            self.metadata_cache = payload['meta']
            # Add static tags from the configuration file
            if self.agentConfig['tags'] is not None:
                payload['tags'] = self.agentConfig['tags']

        return payload
Beispiel #23
0
 def check(self, agentConfig):
     # Get output from ps
     try:
         ps = subprocess.Popen(['ps', 'auxww'], stdout=subprocess.PIPE, close_fds=True).communicate()[0]
     except StandardError:
         self.logger.exception('getProcesses')
         return False
     
     # Split out each process
     processLines = ps.split('\n')
     
     del processLines[0] # Removes the headers
     processLines.pop() # Removes a trailing empty line
     
     processes = []
     
     for line in processLines:
         line = line.split(None, 10)
         processes.append(map(lambda s: s.strip(), line))
     
     return { 'processes':   processes,
              'apiKey':      agentConfig['api_key'],
              'host':        gethostname(agentConfig) }
Beispiel #24
0
    def testCheck(self):
        config = {
            'instances': [{
                'url': 'http://localhost:3834/stats',
                'username': '******',
                'password': '******'
            }]
        }
        self.start_server(HAPROXY_CFG, config)

        # Run the check against our running server
        self.check.check(config['instances'][0])
        # Sleep for 1 second so the rate interval >=1
        time.sleep(1)
        # Run the check again so we get the rates
        self.check.check(config['instances'][0])

        # Metric assertions
        metrics = self.check.get_metrics()
        assert metrics
        self.assertTrue(type(metrics) == type([]))
        self.assertTrue(len(metrics) > 0)

        self.assertEquals(len([t for t in metrics
            if t[0] == "haproxy.backend.bytes.in_rate"]), 2, metrics)
        self.assertEquals(len([t for t in metrics
            if t[0] == "haproxy.frontend.session.current"]), 1, metrics)

        inst = config['instances'][0]
        data = self.check._fetch_data(inst['url'], inst['username'], inst['password'])
        new_data = [l.replace("OPEN", "DOWN") for l in data]

        self.check._process_data(new_data, gethostname(self.agentConfig),
            event_cb=self.check._process_events)

        assert self.check.has_events()
        assert len(self.check.get_events()) == 1
Beispiel #25
0
 def _add_stat(self, name, value, agentConfig):
     ''' For collecting stats on Cacti checks '''
     self.stats.append(
         (name, time.time(), value, {'host_name': gethostname(agentConfig)})
     )
Beispiel #26
0
    def doChecks(self, firstRun=False, systemStats=False):
        """Actual work
        """
        self.checksLogger.info("Starting checks")

        apacheStatus = self.getApacheStatus()
        diskUsage = self.getDiskUsage()
        loadAvrgs = self.getLoadAvrgs()
        memory = self.getMemoryUsage()
        mysqlStatus = self.getMySQLStatus()
        pgsqlStatus = self.getPgSQLStatus()
        networkTraffic = self.getNetworkTraffic()
        nginxStatus = self.getNginxStatus()
        processes = self.getProcesses()
        rabbitmq = self.getRabbitMQStatus()
        mongodb = self.getMongoDBStatus()
        couchdb = self.getCouchDBStatus()
        ioStats = self.getIOStats()
        cpuStats = self.getCPUStats()
        gangliaData = self.getGangliaData()
        cassandraData = self.getCassandraData()
        jvmData = self.getJvmData()
        tomcatData = self.getTomcatData()
        activeMQData = self.getActiveMQData()
        solrData = self.getSolrData()
        memcacheData = self.getMemcacheData()
        dogstreamData = self.getDogstreamData()
        ddforwarderData = self.getDdforwarderData()

        checksData = {
            'collection_timestamp': time.time(),
            'os' : self.os,
            'python': sys.version,
            'agentVersion' : self.agentConfig['version'], 
            'loadAvrg1' : loadAvrgs['1'], 
            'loadAvrg5' : loadAvrgs['5'], 
            'loadAvrg15' : loadAvrgs['15'], 
            'memPhysUsed' : memory.get('physUsed'), 
            'memPhysFree' : memory.get('physFree'), 
            'memPhysTotal' : memory.get('physTotal'), 
            'memPhysUsable' : memory.get('physUsable'), 
            'memSwapUsed' : memory.get('swapUsed'), 
            'memSwapFree' : memory.get('swapFree'), 
            'memSwapTotal' : memory.get('swapTotal'), 
            'memCached' : memory.get('physCached'), 
            'memBuffers': memory.get('physBuffers'),
            'memShared': memory.get('physShared'),
            'networkTraffic' : networkTraffic, 
            'processes' : processes,
            'apiKey': self.agentConfig['apiKey'],
            'events': {},
            'resources': {},
        }

        if diskUsage is not False and len(diskUsage) == 2:
            checksData["diskUsage"] = diskUsage[0]
            checksData["inodes"] = diskUsage[1]
            
        if cpuStats is not False and cpuStats is not None:
            checksData.update(cpuStats)

        if gangliaData is not False and gangliaData is not None:
            checksData['ganglia'] = gangliaData
           
        if cassandraData is not False and cassandraData is not None:
            checksData['cassandra'] = cassandraData
 
        # Apache Status
        if apacheStatus: 
            checksData.update(apacheStatus)
            
        # MySQL Status
        if mysqlStatus:
            checksData.update(mysqlStatus)
       
        # PostgreSQL status
        if pgsqlStatus: 
            checksData['postgresql'] = pgsqlStatus

        # Nginx Status
        if nginxStatus:
            checksData.update(nginxStatus)
            
        # RabbitMQ
        if rabbitmq:
            checksData['rabbitMQ'] = rabbitmq
        
        # MongoDB
        if mongodb:
            if mongodb.has_key('events'):
                checksData['events']['Mongo'] = mongodb['events']['Mongo']
                del mongodb['events']
            checksData['mongoDB'] = mongodb
            
        # CouchDB
        if couchdb:
            checksData['couchDB'] = couchdb
        
        if ioStats:
            checksData['ioStats'] = ioStats
            
        if jvmData:
            checksData['jvm'] = jvmData

        if tomcatData:
            checksData['tomcat'] = tomcatData

        if activeMQData:
            checksData['activemq'] = activeMQData

        if solrData:
            checksData['solr'] = solrData

        if memcacheData:
            checksData['memcache'] = memcacheData
        
        if dogstreamData:
            dogstreamEvents = dogstreamData.get('dogstreamEvents', None)
            if dogstreamEvents:
                if 'dogstream' in checksData['events']:
                    checksData['events']['dogstream'].extend(dogstreamEvents)
                else:
                    checksData['events']['dogstream'] = dogstreamEvents
                del dogstreamData['dogstreamEvents']

            checksData.update(dogstreamData)

        if ddforwarderData:
            checksData['datadog'] = ddforwarderData
 
        # Include server indentifiers
        checksData['internalHostname'] = gethostname(self.agentConfig)
        checksData['uuid'] = getUuid()
        self.checksLogger.debug('doChecks: added uuid %s' % checksData['uuid'])
        
        # Process the event checks. 
        for event_check in self._event_checks:
            event_data = event_check.check(self.checksLogger, self.agentConfig)
            if event_data:
                checksData['events'][event_check.key] = event_data
       
       # Include system stats on first postback
        if firstRun:
            checksData['systemStats'] = systemStats
            # Add static tags from the configuration file
            if self.agentConfig['tags'] is not None:
                checksData['tags'] = self.agentConfig['tags']
            # Also post an event in the newsfeed
            checksData['events']['System'] = [{'api_key': self.agentConfig['apiKey'],
                                               'host': checksData['internalHostname'],
                                               'timestamp': int(time.mktime(datetime.datetime.now().timetuple())),
                                               'event_type':'Agent Startup',
                                               'msg_text': 'Version %s' % get_version()
                                            }]

            # Collect metadata
            checksData['meta'] = self.get_metadata()

        # Resources checks
        has_resource = False
        for resources_check in self._resources_checks:
            resources_check.check()
            snaps = resources_check.pop_snapshots()
            if snaps:
                has_resource = True
                res_value = { 'snaps': snaps,
                              'format_version': resources_check.get_format_version() }                              
                res_format = resources_check.describe_format_if_needed()
                if res_format is not None:
                    res_value['format_description'] = res_format
                checksData['resources'][resources_check.RESOURCE_KEY] = res_value
 
        if has_resource:
            checksData['resources']['meta'] = {
                        'api_key': self.agentConfig['apiKey'],
                        'host': checksData['internalHostname'],
                    }

        metrics = []
        for metrics_check in self._metrics_checks:
            res = metrics_check.check(self.agentConfig)
            if res:
                metrics.extend(res)
        checksData['metrics'] = metrics

        # Send back data
        self.checksLogger.debug("checksData: %s" % checksData)
        for emitter in self.emitters:
            emitter(checksData, self.checksLogger, self.agentConfig)
        self.checksLogger.info("Checks done")
Beispiel #27
0
    def doChecks(self, firstRun=False, systemStats=False):
        """Actual work
        """
        self.checksLogger.info("Starting checks")

        apacheStatus = self._apache.check(self.agentConfig)
        diskUsage = self._disk.check(self.agentConfig)
        loadAvrgs = self._load.check(self.agentConfig)
        memory = self._memory.check(self.agentConfig)
        mysqlStatus = self._mysql.check(self.agentConfig)
        pgsqlStatus = self._pgsql.check(self.agentConfig)
        networkTraffic = self._network.check(self.agentConfig)
        nginxStatus = self._nginx.check(self.agentConfig)
        processes = self._processes.check(self.checksLogger, self.agentConfig)
        rabbitmq = self._rabbitmq.check(self.checksLogger, self.agentConfig)
        mongodb = self._mongodb.check(self.agentConfig)
        couchdb = self._couchdb.check(self.agentConfig)
        ioStats = self._io.check(self.checksLogger, self.agentConfig)
        cpuStats = self._cpu.check(self.checksLogger, self.agentConfig)
        gangliaData = self._ganglia.check(self.agentConfig)
        cassandraData = self._cassandra.check(self.checksLogger, self.agentConfig)
        jvmData = self._jvm.check(self.agentConfig)
        tomcatData = self._tomcat.check(self.agentConfig)
        activeMQData = self._activemq.check(self.agentConfig)
        solrData = self._solr.check(self.agentConfig)
        memcacheData = self._memcache.check(self.agentConfig)
        dogstreamData = self._dogstream.check(self.agentConfig)
        ddforwarderData = self._ddforwarder.check(self.agentConfig)

        checksData = {
            "collection_timestamp": time.time(),
            "os": self.os,
            "python": sys.version,
            "agentVersion": self.agentConfig["version"],
            "loadAvrg1": loadAvrgs["1"],
            "loadAvrg5": loadAvrgs["5"],
            "loadAvrg15": loadAvrgs["15"],
            "memPhysUsed": memory.get("physUsed"),
            "memPhysFree": memory.get("physFree"),
            "memPhysTotal": memory.get("physTotal"),
            "memPhysUsable": memory.get("physUsable"),
            "memSwapUsed": memory.get("swapUsed"),
            "memSwapFree": memory.get("swapFree"),
            "memSwapTotal": memory.get("swapTotal"),
            "memCached": memory.get("physCached"),
            "memBuffers": memory.get("physBuffers"),
            "memShared": memory.get("physShared"),
            "networkTraffic": networkTraffic,
            "processes": processes,
            "apiKey": self.agentConfig["api_key"],
            "events": {},
            "resources": {},
        }

        if diskUsage is not False and len(diskUsage) == 2:
            checksData["diskUsage"] = diskUsage[0]
            checksData["inodes"] = diskUsage[1]

        if cpuStats is not False and cpuStats is not None:
            checksData.update(cpuStats)

        if gangliaData is not False and gangliaData is not None:
            checksData["ganglia"] = gangliaData

        if cassandraData is not False and cassandraData is not None:
            checksData["cassandra"] = cassandraData

        # Apache Status
        if apacheStatus:
            checksData.update(apacheStatus)

        # MySQL Status
        if mysqlStatus:
            checksData.update(mysqlStatus)

        # PostgreSQL status
        if pgsqlStatus:
            checksData["postgresql"] = pgsqlStatus

        # Nginx Status
        if nginxStatus:
            checksData.update(nginxStatus)

        # RabbitMQ
        if rabbitmq:
            checksData["rabbitMQ"] = rabbitmq

        # MongoDB
        if mongodb:
            if mongodb.has_key("events"):
                checksData["events"]["Mongo"] = mongodb["events"]["Mongo"]
                del mongodb["events"]
            checksData["mongoDB"] = mongodb

        # CouchDB
        if couchdb:
            checksData["couchDB"] = couchdb

        if ioStats:
            checksData["ioStats"] = ioStats

        if jvmData:
            checksData["jvm"] = jvmData

        if tomcatData:
            checksData["tomcat"] = tomcatData

        if activeMQData:
            checksData["activemq"] = activeMQData

        if solrData:
            checksData["solr"] = solrData

        if memcacheData:
            checksData["memcache"] = memcacheData

        if dogstreamData:
            dogstreamEvents = dogstreamData.get("dogstreamEvents", None)
            if dogstreamEvents:
                if "dogstream" in checksData["events"]:
                    checksData["events"]["dogstream"].extend(dogstreamEvents)
                else:
                    checksData["events"]["dogstream"] = dogstreamEvents
                del dogstreamData["dogstreamEvents"]

            checksData.update(dogstreamData)

        if ddforwarderData:
            checksData["datadog"] = ddforwarderData

        # Include server indentifiers
        checksData["internalHostname"] = gethostname(self.agentConfig)
        checksData["uuid"] = getUuid()
        self.checksLogger.debug("doChecks: added uuid %s" % checksData["uuid"])

        # Process the event checks.
        for event_check in self._event_checks:
            event_data = event_check.check(self.checksLogger, self.agentConfig)
            if event_data:
                checksData["events"][event_check.key] = event_data

        # Include system stats on first postback
        if firstRun:
            checksData["systemStats"] = systemStats
            # Add static tags from the configuration file
            if self.agentConfig["tags"] is not None:
                checksData["tags"] = self.agentConfig["tags"]
            # Also post an event in the newsfeed
            checksData["events"]["System"] = [
                {
                    "api_key": self.agentConfig["api_key"],
                    "host": checksData["internalHostname"],
                    "timestamp": int(time.mktime(datetime.datetime.now().timetuple())),
                    "event_type": "Agent Startup",
                    "msg_text": "Version %s" % get_version(),
                }
            ]

            # Collect metadata
            checksData["meta"] = self.get_metadata()

        # Resources checks
        has_resource = False
        for resources_check in self._resources_checks:
            resources_check.check()
            snaps = resources_check.pop_snapshots()
            if snaps:
                has_resource = True
                res_value = {"snaps": snaps, "format_version": resources_check.get_format_version()}
                res_format = resources_check.describe_format_if_needed()
                if res_format is not None:
                    res_value["format_description"] = res_format
                checksData["resources"][resources_check.RESOURCE_KEY] = res_value

        if has_resource:
            checksData["resources"]["meta"] = {
                "api_key": self.agentConfig["api_key"],
                "host": checksData["internalHostname"],
            }

        metrics = []
        for metrics_check in self._metrics_checks:
            res = metrics_check.check(self.agentConfig)
            if res:
                metrics.extend(res)
        checksData["metrics"] = metrics

        # Send back data
        self.checksLogger.debug("checksData: %s" % checksData)
        for emitter in self.emitters:
            emitter(checksData, self.checksLogger, self.agentConfig)
        self.checksLogger.info("Checks done")
Beispiel #28
0
    def doChecks(self, firstRun=False, systemStats=False, checksd=None):
        """Actual work
        """
        self.checksLogger.info("Starting checks")
        checksData = {
            'collection_timestamp': time.time(),
            'os' : self.os,
            'python': sys.version,
            'agentVersion' : self.agentConfig['version'],             
            'apiKey': self.agentConfig['api_key'],
            'events': {},
            'resources': {}
        }
        metrics = []
        events = {}

        # Run the system checks. Checks will depend on the OS
        if self.os == 'windows':
            # Win32 system checks
            metrics.extend(self._win32_system_checks['disk'].check(self.agentConfig))
            metrics.extend(self._win32_system_checks['memory'].check(self.agentConfig))
            metrics.extend(self._win32_system_checks['cpu'].check(self.agentConfig))
            metrics.extend(self._win32_system_checks['network'].check(self.agentConfig))
            metrics.extend(self._win32_system_checks['io'].check(self.agentConfig))
            metrics.extend(self._win32_system_checks['proc'].check(self.agentConfig))
        else:
            # Unix system checks
            sys_checks = self._unix_system_checks

            diskUsage = sys_checks['disk'].check(self.agentConfig)
            if diskUsage is not False and len(diskUsage) == 2:
                checksData["diskUsage"] = diskUsage[0]
                checksData["inodes"] = diskUsage[1]

            loadAvrgs = sys_checks['load'].check(self.agentConfig)
            checksData.update({
                'loadAvrg1': loadAvrgs['1'],
                'loadAvrg5': loadAvrgs['5'],
                'loadAvrg15': loadAvrgs['15']
            })

            memory = sys_checks['memory'].check(self.agentConfig)
            checksData.update({
                'memPhysUsed' : memory.get('physUsed'), 
                'memPhysFree' : memory.get('physFree'), 
                'memPhysTotal' : memory.get('physTotal'), 
                'memPhysUsable' : memory.get('physUsable'), 
                'memSwapUsed' : memory.get('swapUsed'), 
                'memSwapFree' : memory.get('swapFree'), 
                'memSwapTotal' : memory.get('swapTotal'), 
                'memCached' : memory.get('physCached'), 
                'memBuffers': memory.get('physBuffers'),
                'memShared': memory.get('physShared')
            })

            ioStats = sys_checks['io'].check(self.checksLogger, self.agentConfig)
            if ioStats:
                checksData['ioStats'] = ioStats

            processes = sys_checks['processes'].check(self.checksLogger, self.agentConfig)
            checksData.update({'processes': processes})

            networkTraffic = sys_checks['network'].check(self.agentConfig)
            checksData.update({'networkTraffic': networkTraffic})

            cpuStats = sys_checks['cpu'].check(self.checksLogger, self.agentConfig)
            if cpuStats is not False and cpuStats is not None:
                checksData.update(cpuStats)

        # Run old-style checks
        apacheStatus = self._apache.check(self.agentConfig)
        mysqlStatus = self._mysql.check(self.agentConfig)
        rabbitmq = self._rabbitmq.check(self.checksLogger, self.agentConfig)
        mongodb = self._mongodb.check(self.agentConfig)
        couchdb = self._couchdb.check(self.agentConfig)
        gangliaData = self._ganglia.check(self.agentConfig)
        cassandraData = self._cassandra.check(self.checksLogger, self.agentConfig)
        dogstreamData = self._dogstream.check(self.agentConfig)
        ddforwarderData = self._ddforwarder.check(self.agentConfig)

        if gangliaData is not False and gangliaData is not None:
            checksData['ganglia'] = gangliaData
           
        if cassandraData is not False and cassandraData is not None:
            checksData['cassandra'] = cassandraData
 
        # Apache Status
        if apacheStatus: 
            checksData.update(apacheStatus)
            
        # MySQL Status
        if mysqlStatus:
            checksData.update(mysqlStatus)
       
        # RabbitMQ
        if rabbitmq:
            checksData['rabbitMQ'] = rabbitmq
        
        # MongoDB
        if mongodb:
            if mongodb.has_key('events'):
                events['Mongo'] = mongodb['events']['Mongo']
                del mongodb['events']
            checksData['mongoDB'] = mongodb
            
        # CouchDB
        if couchdb:
            checksData['couchDB'] = couchdb
            
        if dogstreamData:
            dogstreamEvents = dogstreamData.get('dogstreamEvents', None)
            if dogstreamEvents:
                if 'dogstream' in checksData['events']:
                    events['dogstream'].extend(dogstreamEvents)
                else:
                    events['dogstream'] = dogstreamEvents
                del dogstreamData['dogstreamEvents']

            checksData.update(dogstreamData)

        if ddforwarderData:
            checksData['datadog'] = ddforwarderData
 
        # Include server indentifiers
        checksData['internalHostname'] = gethostname(self.agentConfig)
        checksData['uuid'] = getUuid()
        self.checksLogger.debug('doChecks: added uuid %s' % checksData['uuid'])
        
        # Process the event checks. 
        for event_check in self._event_checks:
            event_data = event_check.check(self.checksLogger, self.agentConfig)
            if event_data:
                events[event_check.key] = event_data
       
        # Include system stats on first postback
        if firstRun:
            checksData['systemStats'] = systemStats
            # Also post an event in the newsfeed
            events['System'] = [{'api_key': self.agentConfig['api_key'],
                                               'host': checksData['internalHostname'],
                                               'timestamp': int(time.mktime(datetime.datetime.now().timetuple())),
                                               'event_type':'Agent Startup',
                                               'msg_text': 'Version %s' % get_version()
                                            }]

        if firstRun or self.should_send_metadata():
            # Collect metadata
            checksData['meta'] = self.get_metadata()
            # Add static tags from the configuration file
            if self.agentConfig['tags'] is not None:
                checksData['tags'] = self.agentConfig['tags']

        # Resources checks
        if self.os != 'windows':
            has_resource = False
            for resources_check in self._resources_checks:
                resources_check.check()
                snaps = resources_check.pop_snapshots()
                if snaps:
                    has_resource = True
                    res_value = { 'snaps': snaps,
                                  'format_version': resources_check.get_format_version() }                              
                    res_format = resources_check.describe_format_if_needed()
                    if res_format is not None:
                        res_value['format_description'] = res_format
                    checksData['resources'][resources_check.RESOURCE_KEY] = res_value
     
            if has_resource:
                checksData['resources']['meta'] = {
                            'api_key': self.agentConfig['api_key'],
                            'host': checksData['internalHostname'],
                        }

        for metrics_check in self._metrics_checks:
            res = metrics_check.check(self.agentConfig)
            if res:
                metrics.extend(res)

        # checks.d checks
        checksd = checksd or []
        for check in checksd:
            check_cls = check['class']
            for instance in check['instances']:
                try:
                    # Run the check for each configuration
                    check_cls.check(instance)
                    metrics.extend(check_cls.get_metrics())
                    if check_cls.has_events():
                        if check['name'] not in events:
                            events[check['name']] = []
                        for ev in check_cls.get_events():
                            events[check['name']].append(ev)
                except Exception:
                    self.checksLogger.exception("Check %s failed" % check_cls.name)


        # Store the metrics in the payload
        checksData['metrics'] = metrics

        # Store the events in the payload
        checksData['events'] = events

        # Send back data
        self.checksLogger.debug("checksData: %s" % checksData)
        for emitter in self.emitters:
            emitter(checksData, self.checksLogger, self.agentConfig)
        self.checksLogger.info("Checks done")
Beispiel #29
0
    def run(self):
        handlers = [
            (r"/intake/?", AgentInputHandler),
            (r"/api/v1/series/?", ApiInputHandler),
            (r"/status/?", StatusHandler),
        ]

        settings = dict(
            cookie_secret="12oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=",
            xsrf_cookies=False,
            debug=False,
        )

        non_local_traffic = self._agentConfig.get("non_local_traffic", False)

        tornado.web.Application.__init__(self, handlers, **settings)
        http_server = tornado.httpserver.HTTPServer(self)

        # set the root logger to warn so tornado is less chatty
        logging.getLogger().setLevel(logging.WARNING)

        # but keep the forwarder logger at the original level
        forwarder_logger = logging.getLogger('forwarder')
        log_config = get_logging_config()
        forwarder_logger.setLevel(log_config['log_level'] or logging.INFO)

        # non_local_traffic must be == True to match, not just some non-false value
        if non_local_traffic is True:
            http_server.listen(self._port)
        else:
            # localhost in lieu of 127.0.0.1 to support IPv6
            try:
                http_server.listen(self._port, address = "localhost")
            except gaierror:
                log.warning("Warning localhost seems undefined in your host file, using 127.0.0.1 instead")
                http_server.listen(self._port, address = "127.0.0.1")

        log.info("Listening on port %d" % self._port)

        # Register callbacks
        self.mloop = tornado.ioloop.IOLoop.instance()

        def flush_trs():
            if self._watchdog:
                self._watchdog.reset()
            self._postMetrics()
            self._tr_manager.flush()

        tr_sched = tornado.ioloop.PeriodicCallback(flush_trs,TRANSACTION_FLUSH_INTERVAL,
            io_loop = self.mloop)

        # Register optional Graphite listener
        gport = self._agentConfig.get("graphite_listen_port", None)
        if gport is not None:
            log.info("Starting graphite listener on port %s" % gport)
            from graphite import GraphiteServer
            gs = GraphiteServer(self, gethostname(self._agentConfig), io_loop=self.mloop)
            if non_local_traffic is True:
                gs.listen(gport)
            else:
                gs.listen(gport, address = "localhost")

        # Start everything
        if self._watchdog:
            self._watchdog.reset()
        tr_sched.start()

        self.mloop.start()
        log.info("Stopped")