def init_dogstatsd(config): api_key = config['api_key'] recent_point_threshold = config.get('recent_point_threshold', None) server_host = config['dogstatsd']['bind_host'] dd_url = config['dd_url'] port = config['dogstatsd']['port'] forward_to_host = config['dogstatsd'].get('forward_host') forward_to_port = config['dogstatsd'].get('forward_port') non_local_traffic = config['dogstatsd'].get('non_local_traffic') so_rcvbuf = config['dogstatsd'].get('so_rcvbuf') utf8_decoding = config['dogstatsd'].get('utf8_decoding') interval = DOGSTATSD_FLUSH_INTERVAL aggregator_interval = DOGSTATSD_AGGREGATOR_BUCKET_SIZE hostname = get_hostname() # get proxy settings proxies = get_proxy() forwarder = Forwarder( api_key, dd_url, proxies=proxies, ) forwarder.start() aggregator = MetricsBucketAggregator( hostname, aggregator_interval, recent_point_threshold=recent_point_threshold, formatter=get_formatter(config), histogram_aggregates=config.get('histogram_aggregates'), histogram_percentiles=config.get('histogram_percentiles'), utf8_decoding=utf8_decoding ) # serializer serializer = Serializer( aggregator, forwarder, ) reporter = Reporter(interval, aggregator, serializer, api_key, use_watchdog=False, hostname=hostname) # NOTICE: when `non_local_traffic` is passed we need to bind to any interface on the box. The forwarder uses # Tornado which takes care of sockets creation (more than one socket can be used at once depending on the # network settings), so it's enough to just pass an empty string '' to the library. # In Dogstatsd we use a single, fullstack socket, so passing '' as the address doesn't work and we default to # '0.0.0.0'. If someone needs to bind Dogstatsd to the IPv6 '::', they need to turn off `non_local_traffic` and # use the '::' meta address as `bind_host`. if non_local_traffic: server_host = '0.0.0.0' server = Server(aggregator, server_host, port, forward_to_host=forward_to_host, forward_to_port=forward_to_port, so_rcvbuf=so_rcvbuf) return reporter, server, forwarder
def test_forwarder_start_stop(): f = Forwarder("api_key", "https://datadog.com", 2) f.start() assert len(f.workers) == 2 assert f.workers[0].is_alive() assert f.workers[1].is_alive() assert f.retry_worker.is_alive() tmp_workers = f.workers tmp_retry_worker = f.retry_worker f.stop() assert len(f.workers) == 0 assert f.retry_worker is None assert not tmp_workers[0].is_alive() assert not tmp_workers[1].is_alive() assert not tmp_retry_worker.is_alive()
async def run(is_debug_mode): """ Runs the cluster discovery & forwarder. """ asyncio.create_task(_epsagon_conf_watcher(is_debug_mode)) events_manager = InMemoryEventsManager() epsagon_client = await EpsagonClient.create(EPSAGON_TOKEN) events_sender = EventsSender( epsagon_client, COLLECTOR_URL, CLUSTER_NAME, EPSAGON_TOKEN ) cluster_discovery = ClusterDiscovery( events_manager.write_event, should_collect_resources=SHOULD_COLLECT_RESOURCES, should_collect_events=SHOULD_COLLECT_EVENTS, ) forwarder = Forwarder( events_manager, events_sender ) while True: try: tasks = [ asyncio.create_task(forwarder.start()), asyncio.create_task(cluster_discovery.start()) ] await asyncio.gather(*tasks) except ( client_exceptions.ClientError, socket.gaierror, ConnectionRefusedError, EpsagonClientException ): logging.error( "Connection error, restarting agent in %d seconds", RESTART_WAIT_TIME_SECONDS ) _cancel_tasks(tasks) events_manager.clean() await asyncio.sleep(RESTART_WAIT_TIME_SECONDS) except Exception as exception: logging.error(str(exception)) logging.error(format_exc()) logging.info("Agent is exiting due to an unexpected error") _cancel_tasks(tasks) await epsagon_client.close() break
def start(): """ Dummy start until we have a collector """ init_agent() hostname = get_hostname() logging.info("Starting the agent, hostname: %s", hostname) # init Forwarder logging.info("Starting the Forwarder") api_key = config.get('api_key') dd_url = config.get('dd_url') if not dd_url: logging.error('No Datadog URL configured - cannot continue') sys.exit(1) if not api_key: logging.error('No API key configured - cannot continue') sys.exit(1) forwarder = Forwarder(api_key, dd_url) forwarder.start() # aggregator aggregator = MetricsAggregator( hostname, interval=config.get('aggregator_interval'), expiry_seconds=(config.get('min_collection_interval') + config.get('aggregator_expiry_seconds')), recent_point_threshold=config.get('recent_point_threshold'), histogram_aggregates=config.get('histogram_aggregates'), histogram_percentiles=config.get('histogram_percentiles'), ) # serializer serializer = Serializer( aggregator, forwarder, ) # instantiate collector collector = Collector(config, aggregator) collector.load_check_classes() collector.instantiate_checks() def signal_handler(signal, frame): logging.info("SIGINT received: stopping the agent") logging.info("Stopping the forwarder") forwarder.stop() logging.info("See you !") sys.exit(0) signal.signal(signal.SIGINT, signal_handler) # update the metadata periodically? metadata = get_metadata(hostname) serializer.submit_metadata(metadata) while True: collector.run_checks() serializer.serialize_and_push() time.sleep(config.get('min_collection_interval'))
def run(self): try: hostname = get_hostname() except HostnameException as e: logging.critical( "{} - You can define one in datadog.yaml or in your hosts file" .format(e)) sys.exit(1) logging.info("Starting the agent, hostname: %s", hostname) # init Forwarder logging.info("Starting the Forwarder") api_key = config.get('api_key') dd_url = config.get('dd_url') if not dd_url: logging.error('No Datadog URL configured - cannot continue') sys.exit(1) if not api_key: logging.error('No API key configured - cannot continue') sys.exit(1) # get proxy settings proxies = get_proxy() logging.debug('Proxy configuration used: %s', proxies) # get site url forwarder = Forwarder( api_key, get_site_url(dd_url, site=config.get('site')), proxies=proxies, ) forwarder.start() # agent aggregator aggregator = MetricsAggregator( hostname, interval=config.get('aggregator_interval'), expiry_seconds=(config.get('min_collection_interval') + config.get('aggregator_expiry_seconds')), recent_point_threshold=config.get('recent_point_threshold'), histogram_aggregates=config.get('histogram_aggregates'), histogram_percentiles=config.get('histogram_percentiles'), ) # serializer serializer = Serializer( aggregator, forwarder, ) # instantiate collector collector = Collector(config, aggregator) collector.load_check_classes() collector.instantiate_checks() # instantiate AgentRunner runner = AgentRunner(collector, serializer, config) # instantiate Dogstatsd reporter = None dsd_server = None dsd_enable = config['dogstatsd'].get('enable', False) if dsd_enable: reporter, dsd_server, _ = init_dogstatsd(config, forwarder=forwarder) dsd = DogstatsdRunner(dsd_server) # instantiate API status = { 'agent': aggregator.stats, 'forwarder': forwarder.stats, 'collector': collector.status, } if dsd_server: status['dogstatsd'] = dsd_server.aggregator.stats api = APIServer(config, status=status) handler = SignalHandler() # components handler.register('runner', runner) handler.register('forwarder', forwarder) handler.register('api', api) if dsd_enable: handler.register('reporter', reporter) handler.register('dsd_server', dsd_server) # signals handler.handle(signal.SIGTERM) handler.handle(signal.SIGINT) # start signal handler handler.start() runner.start() api.start() if dsd_enable: reporter.start() dsd.start() dsd.join() logging.info("Dogstatsd server done...") try: dsd.raise_for_status() except Exception as e: log.error("There was a problem with the dogstatsd server: %s", e) reporter.stop() runner.join() logging.info("Collector done...") api.join() logging.info("API done...") handler.stop() handler.join() logging.info("Signal handler done...") logging.info("Thank you for shopping at DataDog! Come back soon!") sys.exit(0)
def run(self): try: hostname = get_hostname() except HostnameException as e: logging.critical( "{} - You can define one in datadog.yaml or in your hosts file" .format(e)) sys.exit(1) logging.info("Starting the agent, hostname: %s", hostname) # init Forwarder logging.info("Starting the Forwarder") api_key = config.get('api_key') dd_url = config.get('dd_url') if not dd_url: logging.error('No Datadog URL configured - cannot continue') sys.exit(1) if not api_key: logging.error('No API key configured - cannot continue') sys.exit(1) # get proxy settings proxies = get_proxy() logging.debug('Proxy configuration used: %s', proxies) forwarder = Forwarder( api_key, dd_url, proxies=proxies, ) forwarder.start() # aggregator aggregator = MetricsAggregator( hostname, interval=config.get('aggregator_interval'), expiry_seconds=(config.get('min_collection_interval') + config.get('aggregator_expiry_seconds')), recent_point_threshold=config.get('recent_point_threshold'), histogram_aggregates=config.get('histogram_aggregates'), histogram_percentiles=config.get('histogram_percentiles'), ) # serializer serializer = Serializer( aggregator, forwarder, ) # instantiate collector collector = Collector(config, aggregator) collector.load_check_classes() collector.instantiate_checks() # instantiate AgentRunner runner = AgentRunner(collector, serializer, config) # instantiate API api = APIServer(config, aggregator.stats) handler = SignalHandler() # components handler.register('runner', runner) handler.register('forwarder', forwarder) handler.register('api', api) # signals handler.handle(signal.SIGTERM) handler.handle(signal.SIGINT) # start signal handler handler.start() runner.start() api.start() runner.join() logging.info("Agent done...") api.join() logging.info("API done...") handler.stop() handler.join() logging.info("Signal handler done...") logging.info("Thank you for shopping at DataDog! Come back soon!") sys.exit(0)
def run(self): try: hostname = get_hostname() except HostnameException as e: logging.critical( "{} - You can define one in datadog.yaml or in your hosts file" .format(e)) sys.exit(1) logging.info("Starting the agent, hostname: %s", hostname) # init Forwarder logging.info("Starting the Forwarder") api_key = config.get('api_key') dd_url = config.get('dd_url') if not dd_url: logging.error('No Datadog URL configured - cannot continue') sys.exit(1) if not api_key: logging.error('No API key configured - cannot continue') sys.exit(1) # get proxy settings proxies = get_proxy() logging.debug('Proxy configuration used: %s', proxies) forwarder = Forwarder( api_key, dd_url, proxies=proxies, ) forwarder.start() # aggregator aggregator = MetricsAggregator( hostname, interval=config.get('aggregator_interval'), expiry_seconds=(config.get('min_collection_interval') + config.get('aggregator_expiry_seconds')), recent_point_threshold=config.get('recent_point_threshold'), histogram_aggregates=config.get('histogram_aggregates'), histogram_percentiles=config.get('histogram_percentiles'), ) # serializer serializer = Serializer( aggregator, forwarder, ) # instantiate collector collector = Collector(config, aggregator) collector.load_check_classes() collector.instantiate_checks() # instantiate AgentRunner runner = AgentRunner(collector, serializer, config) # instantiate API api = APIServer(8888, aggregator.stats) def signal_handler(signal, frame): log.info("SIGINT received: stopping the agent") log.info("Stopping the forwarder") runner.stop() forwarder.stop() api.stop() log.info("See you !") sys.exit(0) signal.signal(signal.SIGINT, signal_handler) runner.start() api.run() # blocking tornado in main thread