def init_dogstatsd(config):
    api_key = config['api_key']
    recent_point_threshold = config.get('recent_point_threshold', None)
    server_host = config['dogstatsd']['bind_host']
    dd_url = config['dd_url']
    port = config['dogstatsd']['port']
    forward_to_host = config['dogstatsd'].get('forward_host')
    forward_to_port = config['dogstatsd'].get('forward_port')
    non_local_traffic = config['dogstatsd'].get('non_local_traffic')
    so_rcvbuf = config['dogstatsd'].get('so_rcvbuf')
    utf8_decoding = config['dogstatsd'].get('utf8_decoding')

    interval = DOGSTATSD_FLUSH_INTERVAL
    aggregator_interval = DOGSTATSD_AGGREGATOR_BUCKET_SIZE

    hostname = get_hostname()

    # get proxy settings
    proxies = get_proxy()

    forwarder = Forwarder(
        api_key,
        dd_url,
        proxies=proxies,
    )
    forwarder.start()

    aggregator = MetricsBucketAggregator(
        hostname,
        aggregator_interval,
        recent_point_threshold=recent_point_threshold,
        formatter=get_formatter(config),
        histogram_aggregates=config.get('histogram_aggregates'),
        histogram_percentiles=config.get('histogram_percentiles'),
        utf8_decoding=utf8_decoding
    )
    # serializer
    serializer = Serializer(
        aggregator,
        forwarder,
    )

    reporter = Reporter(interval, aggregator, serializer, api_key,
                        use_watchdog=False, hostname=hostname)

    # NOTICE: when `non_local_traffic` is passed we need to bind to any interface on the box. The forwarder uses
    # Tornado which takes care of sockets creation (more than one socket can be used at once depending on the
    # network settings), so it's enough to just pass an empty string '' to the library.
    # In Dogstatsd we use a single, fullstack socket, so passing '' as the address doesn't work and we default to
    # '0.0.0.0'. If someone needs to bind Dogstatsd to the IPv6 '::', they need to turn off `non_local_traffic` and
    # use the '::' meta address as `bind_host`.
    if non_local_traffic:
        server_host = '0.0.0.0'

    server = Server(aggregator, server_host, port, forward_to_host=forward_to_host,
                    forward_to_port=forward_to_port, so_rcvbuf=so_rcvbuf)

    return reporter, server, forwarder
Esempio n. 2
0
def test_forwarder_start_stop():
    f = Forwarder("api_key", "https://datadog.com", 2)
    f.start()

    assert len(f.workers) == 2
    assert f.workers[0].is_alive()
    assert f.workers[1].is_alive()
    assert f.retry_worker.is_alive()

    tmp_workers = f.workers
    tmp_retry_worker = f.retry_worker

    f.stop()

    assert len(f.workers) == 0
    assert f.retry_worker is None
    assert not tmp_workers[0].is_alive()
    assert not tmp_workers[1].is_alive()
    assert not tmp_retry_worker.is_alive()
Esempio n. 3
0
async def run(is_debug_mode):
    """
    Runs the cluster discovery & forwarder.
    """
    asyncio.create_task(_epsagon_conf_watcher(is_debug_mode))
    events_manager = InMemoryEventsManager()
    epsagon_client = await EpsagonClient.create(EPSAGON_TOKEN)
    events_sender = EventsSender(
        epsagon_client,
        COLLECTOR_URL,
        CLUSTER_NAME,
        EPSAGON_TOKEN
    )
    cluster_discovery = ClusterDiscovery(
        events_manager.write_event,
        should_collect_resources=SHOULD_COLLECT_RESOURCES,
        should_collect_events=SHOULD_COLLECT_EVENTS,
    )
    forwarder = Forwarder(
        events_manager,
        events_sender
    )
    while True:
        try:
            tasks = [
                asyncio.create_task(forwarder.start()),
                asyncio.create_task(cluster_discovery.start())
            ]
            await asyncio.gather(*tasks)
        except (
                client_exceptions.ClientError,
                socket.gaierror,
                ConnectionRefusedError,
                EpsagonClientException
        ):
            logging.error(
                "Connection error, restarting agent in %d seconds",
                RESTART_WAIT_TIME_SECONDS
            )
            _cancel_tasks(tasks)
            events_manager.clean()
            await asyncio.sleep(RESTART_WAIT_TIME_SECONDS)
        except Exception as exception:
            logging.error(str(exception))
            logging.error(format_exc())
            logging.info("Agent is exiting due to an unexpected error")
            _cancel_tasks(tasks)
            await epsagon_client.close()
            break
Esempio n. 4
0
def start():
    """
    Dummy start until we have a collector
    """
    init_agent()

    hostname = get_hostname()

    logging.info("Starting the agent, hostname: %s", hostname)

    # init Forwarder
    logging.info("Starting the Forwarder")
    api_key = config.get('api_key')
    dd_url = config.get('dd_url')
    if not dd_url:
        logging.error('No Datadog URL configured - cannot continue')
        sys.exit(1)
    if not api_key:
        logging.error('No API key configured - cannot continue')
        sys.exit(1)

    forwarder = Forwarder(api_key, dd_url)
    forwarder.start()

    # aggregator
    aggregator = MetricsAggregator(
        hostname,
        interval=config.get('aggregator_interval'),
        expiry_seconds=(config.get('min_collection_interval') +
                        config.get('aggregator_expiry_seconds')),
        recent_point_threshold=config.get('recent_point_threshold'),
        histogram_aggregates=config.get('histogram_aggregates'),
        histogram_percentiles=config.get('histogram_percentiles'),
    )

    # serializer
    serializer = Serializer(
        aggregator,
        forwarder,
    )

    # instantiate collector
    collector = Collector(config, aggregator)
    collector.load_check_classes()
    collector.instantiate_checks()

    def signal_handler(signal, frame):
        logging.info("SIGINT received: stopping the agent")
        logging.info("Stopping the forwarder")
        forwarder.stop()
        logging.info("See you !")
        sys.exit(0)

    signal.signal(signal.SIGINT, signal_handler)

    # update the metadata periodically?
    metadata = get_metadata(hostname)
    serializer.submit_metadata(metadata)
    while True:
        collector.run_checks()
        serializer.serialize_and_push()
        time.sleep(config.get('min_collection_interval'))
Esempio n. 5
0
    def run(self):
        try:
            hostname = get_hostname()
        except HostnameException as e:
            logging.critical(
                "{} - You can define one in datadog.yaml or in your hosts file"
                .format(e))
            sys.exit(1)

        logging.info("Starting the agent, hostname: %s", hostname)

        # init Forwarder
        logging.info("Starting the Forwarder")
        api_key = config.get('api_key')
        dd_url = config.get('dd_url')
        if not dd_url:
            logging.error('No Datadog URL configured - cannot continue')
            sys.exit(1)
        if not api_key:
            logging.error('No API key configured - cannot continue')
            sys.exit(1)

        # get proxy settings
        proxies = get_proxy()
        logging.debug('Proxy configuration used: %s', proxies)

        # get site url
        forwarder = Forwarder(
            api_key,
            get_site_url(dd_url, site=config.get('site')),
            proxies=proxies,
        )
        forwarder.start()

        # agent aggregator
        aggregator = MetricsAggregator(
            hostname,
            interval=config.get('aggregator_interval'),
            expiry_seconds=(config.get('min_collection_interval') +
                            config.get('aggregator_expiry_seconds')),
            recent_point_threshold=config.get('recent_point_threshold'),
            histogram_aggregates=config.get('histogram_aggregates'),
            histogram_percentiles=config.get('histogram_percentiles'),
        )

        # serializer
        serializer = Serializer(
            aggregator,
            forwarder,
        )

        # instantiate collector
        collector = Collector(config, aggregator)
        collector.load_check_classes()
        collector.instantiate_checks()

        # instantiate AgentRunner
        runner = AgentRunner(collector, serializer, config)

        # instantiate Dogstatsd
        reporter = None
        dsd_server = None
        dsd_enable = config['dogstatsd'].get('enable', False)
        if dsd_enable:
            reporter, dsd_server, _ = init_dogstatsd(config,
                                                     forwarder=forwarder)
            dsd = DogstatsdRunner(dsd_server)

        # instantiate API
        status = {
            'agent': aggregator.stats,
            'forwarder': forwarder.stats,
            'collector': collector.status,
        }
        if dsd_server:
            status['dogstatsd'] = dsd_server.aggregator.stats

        api = APIServer(config, status=status)

        handler = SignalHandler()
        # components
        handler.register('runner', runner)
        handler.register('forwarder', forwarder)
        handler.register('api', api)
        if dsd_enable:
            handler.register('reporter', reporter)
            handler.register('dsd_server', dsd_server)

        # signals
        handler.handle(signal.SIGTERM)
        handler.handle(signal.SIGINT)

        # start signal handler
        handler.start()

        runner.start()
        api.start()

        if dsd_enable:
            reporter.start()
            dsd.start()

            dsd.join()
            logging.info("Dogstatsd server done...")
            try:
                dsd.raise_for_status()
            except Exception as e:
                log.error("There was a problem with the dogstatsd server: %s",
                          e)
                reporter.stop()

        runner.join()
        logging.info("Collector done...")

        api.join()
        logging.info("API done...")

        handler.stop()
        handler.join()
        logging.info("Signal handler done...")

        logging.info("Thank you for shopping at DataDog! Come back soon!")

        sys.exit(0)
Esempio n. 6
0
    def run(self):
        try:
            hostname = get_hostname()
        except HostnameException as e:
            logging.critical(
                "{} - You can define one in datadog.yaml or in your hosts file"
                .format(e))
            sys.exit(1)

        logging.info("Starting the agent, hostname: %s", hostname)

        # init Forwarder
        logging.info("Starting the Forwarder")
        api_key = config.get('api_key')
        dd_url = config.get('dd_url')
        if not dd_url:
            logging.error('No Datadog URL configured - cannot continue')
            sys.exit(1)
        if not api_key:
            logging.error('No API key configured - cannot continue')
            sys.exit(1)

        # get proxy settings
        proxies = get_proxy()
        logging.debug('Proxy configuration used: %s', proxies)

        forwarder = Forwarder(
            api_key,
            dd_url,
            proxies=proxies,
        )
        forwarder.start()

        # aggregator
        aggregator = MetricsAggregator(
            hostname,
            interval=config.get('aggregator_interval'),
            expiry_seconds=(config.get('min_collection_interval') +
                            config.get('aggregator_expiry_seconds')),
            recent_point_threshold=config.get('recent_point_threshold'),
            histogram_aggregates=config.get('histogram_aggregates'),
            histogram_percentiles=config.get('histogram_percentiles'),
        )

        # serializer
        serializer = Serializer(
            aggregator,
            forwarder,
        )

        # instantiate collector
        collector = Collector(config, aggregator)
        collector.load_check_classes()
        collector.instantiate_checks()

        # instantiate AgentRunner
        runner = AgentRunner(collector, serializer, config)

        # instantiate API
        api = APIServer(config, aggregator.stats)

        handler = SignalHandler()
        # components
        handler.register('runner', runner)
        handler.register('forwarder', forwarder)
        handler.register('api', api)
        # signals
        handler.handle(signal.SIGTERM)
        handler.handle(signal.SIGINT)

        # start signal handler
        handler.start()

        runner.start()
        api.start()

        runner.join()
        logging.info("Agent done...")

        api.join()
        logging.info("API done...")

        handler.stop()
        handler.join()
        logging.info("Signal handler done...")

        logging.info("Thank you for shopping at DataDog! Come back soon!")

        sys.exit(0)
Esempio n. 7
0
    def run(self):
        try:
            hostname = get_hostname()
        except HostnameException as e:
            logging.critical(
                "{} - You can define one in datadog.yaml or in your hosts file"
                .format(e))
            sys.exit(1)

        logging.info("Starting the agent, hostname: %s", hostname)

        # init Forwarder
        logging.info("Starting the Forwarder")
        api_key = config.get('api_key')
        dd_url = config.get('dd_url')
        if not dd_url:
            logging.error('No Datadog URL configured - cannot continue')
            sys.exit(1)
        if not api_key:
            logging.error('No API key configured - cannot continue')
            sys.exit(1)

        # get proxy settings
        proxies = get_proxy()
        logging.debug('Proxy configuration used: %s', proxies)

        forwarder = Forwarder(
            api_key,
            dd_url,
            proxies=proxies,
        )
        forwarder.start()

        # aggregator
        aggregator = MetricsAggregator(
            hostname,
            interval=config.get('aggregator_interval'),
            expiry_seconds=(config.get('min_collection_interval') +
                            config.get('aggregator_expiry_seconds')),
            recent_point_threshold=config.get('recent_point_threshold'),
            histogram_aggregates=config.get('histogram_aggregates'),
            histogram_percentiles=config.get('histogram_percentiles'),
        )

        # serializer
        serializer = Serializer(
            aggregator,
            forwarder,
        )

        # instantiate collector
        collector = Collector(config, aggregator)
        collector.load_check_classes()
        collector.instantiate_checks()

        # instantiate AgentRunner
        runner = AgentRunner(collector, serializer, config)

        # instantiate API
        api = APIServer(8888, aggregator.stats)

        def signal_handler(signal, frame):
            log.info("SIGINT received: stopping the agent")
            log.info("Stopping the forwarder")
            runner.stop()
            forwarder.stop()
            api.stop()
            log.info("See you !")
            sys.exit(0)

        signal.signal(signal.SIGINT, signal_handler)

        runner.start()
        api.run()  # blocking tornado in main thread