def setUp(self): aggregator = MetricsAggregator("test_host") self.server = Server(aggregator, "localhost", STATSD_PORT) self.reporter = DummyReporter(aggregator) self.t1 = threading.Thread(target=self.server.start) self.t1.start() confd_path = os.path.realpath(os.path.join(os.path.abspath(__file__), "..", "jmx_yamls")) JMXFetch.init(confd_path, {'dogstatsd_port': STATSD_PORT}, get_logging_config(), 15)
def run(self): handlers = [ (r"/intake/?", AgentInputHandler), (r"/api/v1/series/?", AgentInputHandler), (r"/status/?", StatusHandler), ] settings = dict( cookie_secret="12oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", xsrf_cookies=False, debug=False, log_function=self.log_request ) non_local_traffic = self._agentConfig.get("non_local_traffic", False) tornado.web.Application.__init__(self, handlers, **settings) http_server = tornado.httpserver.HTTPServer(self) try: # non_local_traffic must be == True to match, not just some non-false value if non_local_traffic is True: http_server.listen(self._port) else: # localhost in lieu of 127.0.0.1 to support IPv6 try: http_server.listen(self._port, address="localhost") except gaierror: log.warning( "localhost seems undefined in your host file, using 127.0.0.1 instead") http_server.listen(self._port, address="127.0.0.1") except socket_error as e: if "Errno 99" in str(e): log.warning("IPv6 doesn't seem to be fully supported. Falling back to IPv4") http_server.listen(self._port, address="127.0.0.1") else: raise except socket_error as e: log.exception( "Socket error %s. Is another application listening on the same port ? Exiting", e) sys.exit(1) except Exception: log.exception("Uncaught exception. Forwarder is exiting.") sys.exit(1) log.info("Listening on port %d" % self._port) # Register callbacks self.mloop = get_tornado_ioloop() logging.getLogger().setLevel(get_logging_config()['log_level'] or logging.INFO) def flush_trs(): if self._watchdog: self._watchdog.reset() self._post_metrics() self._tr_manager.flush() tr_sched = tornado.ioloop.PeriodicCallback( flush_trs, TRANSACTION_FLUSH_INTERVAL, io_loop=self.mloop) # Start everything if self._watchdog: self._watchdog.reset() tr_sched.start() self.mloop.start() log.info("Stopped")
import tornado.httpclient import tornado.httpserver import tornado.ioloop import tornado.web from tornado.escape import json_decode from tornado.options import define, parse_command_line, options # agent import from monagent.common.check_status import ForwarderStatus from monagent.common.config import get_config from monagent.common.metrics import Measurement from monagent.common.util import Watchdog, get_tornado_ioloop from transaction import Transaction, TransactionManager log = logging.getLogger('forwarder') log.setLevel(get_logging_config()['log_level'] or logging.INFO) TRANSACTION_FLUSH_INTERVAL = 5000 # Every 5 seconds WATCHDOG_INTERVAL_MULTIPLIER = 10 # 10x flush interval # Maximum delay before replaying a transaction MAX_WAIT_FOR_REPLAY = timedelta(seconds=90) # Maximum queue size in bytes (when this is reached, old messages are dropped) MAX_QUEUE_SIZE = 30 * 1024 * 1024 # 30MB THROTTLING_DELAY = timedelta(microseconds=1000000 / 2) # 2 msg/second class MetricTransaction(Transaction):
def run(self): handlers = [ (r"/intake/?", AgentInputHandler), (r"/api/v1/series/?", AgentInputHandler), (r"/status/?", StatusHandler), ] settings = dict( cookie_secret="12oETzKXQAGaYdkL5gEmGeJJFuYh7EQnp2XdTP1o/Vo=", xsrf_cookies=False, debug=False, log_function=self.log_request) non_local_traffic = self._agentConfig.get("non_local_traffic", False) tornado.web.Application.__init__(self, handlers, **settings) http_server = tornado.httpserver.HTTPServer(self) try: # non_local_traffic must be == True to match, not just some non-false value if non_local_traffic is True: http_server.listen(self._port) else: # localhost in lieu of 127.0.0.1 to support IPv6 try: http_server.listen(self._port, address="localhost") except gaierror: log.warning( "localhost seems undefined in your host file, using 127.0.0.1 instead" ) http_server.listen(self._port, address="127.0.0.1") except socket_error as e: if "Errno 99" in str(e): log.warning( "IPv6 doesn't seem to be fully supported. Falling back to IPv4" ) http_server.listen(self._port, address="127.0.0.1") else: raise except socket_error as e: log.exception( "Socket error %s. Is another application listening on the same port ? Exiting", e) sys.exit(1) except Exception: log.exception("Uncaught exception. Forwarder is exiting.") sys.exit(1) log.info("Listening on port %d" % self._port) # Register callbacks self.mloop = get_tornado_ioloop() logging.getLogger().setLevel(get_logging_config()['log_level'] or logging.INFO) def flush_trs(): if self._watchdog: self._watchdog.reset() self._post_metrics() self._tr_manager.flush() tr_sched = tornado.ioloop.PeriodicCallback(flush_trs, TRANSACTION_FLUSH_INTERVAL, io_loop=self.mloop) # Start everything if self._watchdog: self._watchdog.reset() tr_sched.start() self.mloop.start() log.info("Stopped")
def main(): options, args = get_parsed_args() agentConfig = get_config(options=options) # todo autorestart isn't used remove autorestart = agentConfig.get('autorestart', False) COMMANDS = [ 'start', 'stop', 'restart', 'foreground', 'status', 'info', 'check', 'configcheck', 'jmx', ] if len(args) < 1: sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS))) return 2 command = args[0] if command not in COMMANDS: sys.stderr.write("Unknown command: %s\n" % command) return 3 pid_file = PidFile('mon-agent') if options.clean: pid_file.clean() agent = CollectorDaemon(pid_file.get_path(), autorestart) if command in START_COMMANDS: log.info('Agent version %s' % get_version()) if 'start' == command: log.info('Start daemon') agent.start() elif 'stop' == command: log.info('Stop daemon') agent.stop() elif 'restart' == command: log.info('Restart daemon') agent.restart() elif 'status' == command: agent.status() elif 'info' == command: return agent.info(verbose=options.verbose) elif 'foreground' == command: logging.info('Running in foreground') if autorestart: # Set-up the supervisor callbacks and fork it. logging.info('Running Agent with auto-restart ON') def child_func(): agent.run() def parent_func(): agent.start_event = False AgentSupervisor.start(parent_func, child_func) else: # Run in the standard foreground. agent.run(config=agentConfig) elif 'check' == command: check_name = args[1] try: # Try the old-style check first print getattr(collector.checks.collector, check_name)(log).check(agentConfig) except Exception: # If not an old-style check, try checks_d checks = load_check_directory(agentConfig) for check in checks['initialized_checks']: if check.name == check_name: check.run() print check.get_metrics() print check.get_events() if len(args) == 3 and args[2] == 'check_rate': print "Running 2nd iteration to capture rate metrics" time.sleep(1) check.run() print check.get_metrics() print check.get_events() elif 'configcheck' == command or 'configtest' == command: osname = get_os() all_valid = True for conf_path in glob.glob(os.path.join(get_confd_path(osname), "*.yaml")): basename = os.path.basename(conf_path) try: check_yaml(conf_path) except Exception as e: all_valid = False print "%s contains errors:\n %s" % (basename, e) else: print "%s is valid" % basename if all_valid: print "All yaml files passed. You can now run the Monitoring agent." return 0 else: print("Fix the invalid yaml files above in order to start the Monitoring agent. " "A useful external tool for yaml parsing can be found at " "http://yaml-online-parser.appspot.com/") return 1 elif 'jmx' == command: from collector.jmxfetch import JMX_LIST_COMMANDS, JMXFetch if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys(): print "#" * 80 print "JMX tool to be used to help configuring your JMX checks." print "See http://docs.datadoghq.com/integrations/java/ for more information" print "#" * 80 print "\n" print "You have to specify one of the following command:" for command, desc in JMX_LIST_COMMANDS.iteritems(): print " - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc) print "Example: sudo /etc/init.d/mon-agent jmx list_matching_attributes tomcat jmx solr" print "\n" else: jmx_command = args[1] checks_list = args[2:] confd_directory = get_confd_path(get_os()) should_run = JMXFetch.init( confd_directory, agentConfig, get_logging_config(), 15, jmx_command, checks_list, reporter="console") if not should_run: print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory print "Have you enabled any JMX check ?" return 0