Esempio n. 1
0
class TestTomcat(unittest.TestCase):
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        pid_file = PidFile('dogstatsd')
        self.reporter = DummyReporter(aggregator)

        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()

        confd_path = os.path.join(os.environ['VOLATILE_DIR'], 'jmx_yaml')
        self.jmx_daemon = JMXFetch(confd_path, {'dogstatsd_port': STATSD_PORT})
        self.t2 = threading.Thread(target=self.jmx_daemon.run)
        self.t2.start()

    def tearDown(self):
        self.server.stop()
        self.reporter.finished = True
        self.jmx_daemon.terminate()

    def test_tomcat_metrics(self):
        count = 0
        while self.reporter.metrics is None:
            time.sleep(1)
            count += 1
            if count > 25:
                raise Exception("No metrics were received in 25 seconds")

        metrics = self.reporter.metrics

        self.assertTrue(type(metrics) == type([]))
        self.assertTrue(len(metrics) > 0)
        self.assertEquals(len([t for t in metrics if t['metric'] == "tomcat.threads.busy" and "instance:tomcat_instance" in t['tags']]), 2, metrics)
        self.assertEquals(len([t for t in metrics if t['metric'] == "tomcat.bytes_sent" and "instance:tomcat_instance" in t['tags']]), 0, metrics)
        self.assertTrue(len([t for t in metrics if "jvm." in t['metric'] and "instance:tomcat_instance" in t['tags']]) > 4, metrics)
Esempio n. 2
0
class JMXTestCase(unittest.TestCase):
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        self.reporter = DummyReporter(aggregator)

        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()

        confd_path = Fixtures.directory()

        self.jmx_daemon = JMXFetch(confd_path, {'dogstatsd_port': STATSD_PORT})
        self.t2 = threading.Thread(target=self.jmx_daemon.run)
        self.t2.start()

    def tearDown(self):
        self.server.stop()
        self.reporter.finished = True
        self.jmx_daemon.terminate()

    def testCustomJMXMetric(self):
        count = 0
        while self.reporter.metrics is None:
            time.sleep(1)
            count += 1
            if count > 25:
                raise Exception("No metrics were received in 25 seconds")

        metrics = self.reporter.metrics

        self.assertTrue(isinstance(metrics, ListType))
        self.assertTrue(len(metrics) > 0)
        self.assertTrue(len([t for t in metrics if "cassandra.db." in t['metric'] and "instance:cassandra_instance" in t['tags']]) > 40, metrics)
Esempio n. 3
0
class JMXTestCase(unittest.TestCase):
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        self.reporter = DummyReporter(aggregator)

        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()

        confd_path = os.path.join(os.path.dirname(__file__))
        self.jmx_daemon = JMXFetch(confd_path, {'dogstatsd_port': STATSD_PORT})
        self.t2 = threading.Thread(target=self.jmx_daemon.run)
        self.t2.start()

    def tearDown(self):
        self.server.stop()
        self.reporter.finished = True
        self.jmx_daemon.terminate()

    def testTomcatMetrics(self):
        count = 0
        while self.reporter.metrics is None:
            time.sleep(1)
            count += 1
            if count > 25:
                raise Exception("No metrics were received in 25 seconds")

        metrics = self.reporter.metrics

        self.assertTrue(isinstance(metrics, ListType))
        self.assertTrue(len(metrics) > 8, metrics)
        self.assertEquals(len([t for t in metrics if 'instance:solr_instance' in t['tags'] and t['metric'] == "jvm.thread_count"]), 1, metrics)
        self.assertTrue(len([t for t in metrics if "jvm." in t['metric'] and 'instance:solr_instance' in t['tags']]) > 4, metrics)
        self.assertTrue(len([t for t in metrics if "solr." in t['metric'] and 'instance:solr_instance' in t['tags']]) > 4, metrics)
Esempio n. 4
0
class JMXFetchProcess(multiprocessing.Process):
    def __init__(self, agentConfig, hostname):
        multiprocessing.Process.__init__(self, name='jmxfetch')
        self.config = agentConfig
        self.hostname = hostname

        try:
            confd_path = get_confd_path()
            self.jmx_daemon = JMXFetch(confd_path, agentConfig)
            self.jmx_daemon.configure()
            self.is_enabled = self.jmx_daemon.should_run()

        except PathNotFound:
            self.is_enabled = False

    def run(self):
        if self.is_enabled:
            JMXFiles.clean_exit_file()
            self.jmx_daemon.run()

    def terminate(self):
        """
        Override `terminate` method to properly exit JMXFetch.
        """
        JMXFiles.write_exit_file()
        self.join()
Esempio n. 5
0
class JMXTestCase(unittest.TestCase):
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        self.reporter = DummyReporter(aggregator)

        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()

        confd_path = os.path.join(os.environ['VOLATILE_DIR'], 'jmx_yaml')
        self.jmx_daemon = JMXFetch(confd_path, {'sdstatsd_port': STATSD_PORT})
        self.t2 = threading.Thread(target=self.jmx_daemon.run)
        self.t2.start()

    def tearDown(self):
        self.server.stop()
        self.reporter.finished = True
        self.jmx_daemon.terminate()

    def testCustomJMXMetric(self):
        count = 0
        while self.reporter.metrics is None:
            time.sleep(1)
            count += 1
            if count > 20:
                raise Exception("No metrics were received in 20 seconds")

        metrics = self.reporter.metrics

        self.assertTrue(isinstance(metrics, ListType))
        self.assertTrue(len(metrics) > 0)
        self.assertEquals(len([t for t in metrics if t['metric'] == "my.metric.buf" and "instance:jmx_instance1" in t['tags']]), 2, metrics)
        self.assertTrue(len([t for t in metrics if 'type:ThreadPool' in t['tags'] and "instance:jmx_instance1" in t['tags'] and "jmx.catalina" in t['metric']]) > 8, metrics)
        self.assertTrue(len([t for t in metrics if "jvm." in t['metric'] and "instance:jmx_instance1" in t['tags']]) == 13, metrics)
Esempio n. 6
0
class JMXFetchProcess(multiprocessing.Process):
    def __init__(self, agentConfig, hostname, **options):
        multiprocessing.Process.__init__(self, name='jmxfetch')
        self.config = agentConfig
        self.hostname = hostname
        self.options = options

        try:
            confd_path = get_confd_path()
            self.jmx_daemon = JMXFetch(confd_path, agentConfig)
            self.jmx_daemon.configure()
            self.is_enabled = self.jmx_daemon.should_run()

        except PathNotFound:
            self.is_enabled = False

    def run(self):
        from config import initialize_logging
        initialize_logging('jmxfetch')
        if self.is_enabled:
            log.debug("Windows Service - Starting JMXFetch")
            JMXFiles.clean_exit_file()
            self.jmx_daemon.run()
        else:
            log.info("Windows Service - Not starting JMXFetch: no valid configuration found")

    def terminate(self):
        """
        Override `terminate` method to properly exit JMXFetch.
        """
        JMXFiles.write_exit_file()
        self.join()
Esempio n. 7
0
    def _handle_sigterm(self, signum, frame):
        log.debug("Caught sigterm. Stopping run loop.")
        self.run_forever = False

        if JMXFetch.is_running():
            JMXFetch.stop()

        if self.collector:
            self.collector.stop()
        log.debug("Collector is stopped.")
Esempio n. 8
0
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        pid_file = PidFile('dogstatsd')
        self.reporter = DummyReporter(aggregator)

        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()

        confd_path = os.path.join(os.environ['VOLATILE_DIR'], 'jmx_yaml')
        JMXFetch.init(confd_path, {'dogstatsd_port':STATSD_PORT}, get_logging_config(), 15, JMX_COLLECT_COMMAND)
Esempio n. 9
0
    def _get_jmxfetch_subprocess_args(self, yaml_jmx_conf, mock_subprocess_call):
        # Helper function
        # Returns the Java JMX subprocess_args called from a YAML configuration
        tmp_dir = tempfile.mkdtemp()
        filename = "jmx.yaml"
        with open(os.path.join(tmp_dir, filename), 'wb') as temp_file:
            temp_file.write(yaml.dump(yaml_jmx_conf))

        jmx = JMXFetch(tmp_dir, {})
        jmx.run(reporter="console")
        return mock_subprocess_call.call_args[0][0]
Esempio n. 10
0
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        pid_file = PidFile('dogstatsd')
        self.reporter = DummyReporter(aggregator)
        
        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()

        confd_path = os.path.realpath(os.path.join(os.path.abspath(__file__), "..", "jmx_yamls"))
        JMXFetch.init(confd_path, {'dogstatsd_port':STATSD_PORT}, get_logging_config(), 15)
Esempio n. 11
0
class TestKafka(unittest.TestCase):
    """Basic Test for kafka integration."""
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        self.reporter = DummyReporter(aggregator)

        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()

        confd_path = os.path.join(os.path.dirname(__file__), 'resources/')

        self.jmx_daemon = JMXFetch(confd_path, {'dogstatsd_port': STATSD_PORT})
        self.t2 = threading.Thread(target=self.jmx_daemon.run)
        self.t2.start()

    def tearDown(self):
        self.server.stop()
        self.reporter.finished = True
        self.jmx_daemon.terminate()

    def testCustomJMXMetric(self):
        count = 0
        while self.reporter.metrics is None:
            time.sleep(1)
            count += 1
            if count > 25:
                raise Exception("No metrics were received in 25 seconds")

        metrics = self.reporter.metrics
        # expected_tags = ['env:test', 'instance:kafka-172.17.0.1-9999', 'kafka:broker']

        self.assertTrue(isinstance(metrics, ListType))
        self.assertTrue(len(metrics) > 0)
        log.info(metrics)
        log.info(len(metrics))
        self.assertTrue(
            len([t for t in metrics if "jvm." in t['metric'] and "instance:kafka-172.17.0.1-9999" in t['tags']]) >= 13, metrics)
        self.assertTrue(
            len([t for t in metrics if "kafka.request." in t['metric'] and "instance:kafka-172.17.0.1-9999" in t['tags']]) == 12, metrics)
        self.assertTrue(
            len([t for t in metrics if "kafka.replication." in t['metric'] and "instance:kafka-172.17.0.1-9999" in t['tags']]) == 6, metrics)

        # CLIENT metrics.
        # kafka.producer.request_latency_avg
        self.assertTrue(
            len([t for t in metrics if "kafka.producer." in t['metric'] and "instance:kafka-172.17.0.1-7777" in t['tags']]) == 1, metrics)
        # kafka.consumer.fetch_rate, kafka.consumer.max_lag
        self.assertTrue(
            len([t for t in metrics if "kafka.consumer." in t['metric'] and "instance:kafka-172.17.0.1-7777" in t['tags']]) == 2, metrics)
Esempio n. 12
0
    def _add_jmxinfo_tar(self):
        _, _, should_run_jmx = self._capture_output(self._should_run_jmx)
        if should_run_jmx:
            # status files (before listing beans because executing jmxfetch overwrites status files)
            for file_name, file_path in [
                (JMXFiles._STATUS_FILE, JMXFiles.get_status_file_path()),
                (JMXFiles._PYTHON_STATUS_FILE, JMXFiles.get_python_status_file_path())
            ]:
                if self._can_read(file_path, warn=False):
                    self._add_file_tar(
                        file_path,
                        os.path.join('jmxinfo', file_name)
                    )

            # beans lists
            for command in ['list_matching_attributes', 'list_everything']:
                log.info("  * datadog-agent jmx {0} output".format(command))
                self._add_command_output_tar(
                    os.path.join('jmxinfo', '{0}.log'.format(command)),
                    partial(self._jmx_command_call, command)
                )

            # java version
            log.info("  * java -version output")
            _, _, java_bin_path = self._capture_output(
                lambda: JMXFetch.get_configuration(get_confd_path())[2] or 'java')
            self._add_command_output_tar(
                os.path.join('jmxinfo', 'java_version.log'),
                lambda: self._java_version(java_bin_path),
                command_desc="{0} -version".format(java_bin_path)
            )
Esempio n. 13
0
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        self.reporter = DummyReporter(aggregator)

        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()
        confd_path = os.path.join(os.path.dirname(__file__))

        self.jmx_daemon = JMXFetch(confd_path, {'dogstatsd_port': STATSD_PORT})
        self.t2 = threading.Thread(target=self.jmx_daemon.run)
        self.t2.start()
Esempio n. 14
0
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        self.reporter = DummyReporter(aggregator)

        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()

        confd_path = os.path.join(os.environ['VOLATILE_DIR'], 'jmx_yaml')
        self.jmx_daemon = JMXFetch(confd_path, {'sdstatsd_port': STATSD_PORT})
        self.t2 = threading.Thread(target=self.jmx_daemon.run)
        self.t2.start()
Esempio n. 15
0
def jmx_command(args, agent_config, redirect_std_streams=False):
    """
    Run JMXFetch with the given command if it is valid (and print user-friendly info if it's not)
    """
    from jmxfetch import JMX_LIST_COMMANDS, JMXFetch
    if len(args) < 1 or args[0] not in JMX_LIST_COMMANDS.keys():
        print "#" * 80
        print "JMX tool to be used to help configuring your JMX checks."
        print "See http://docs.datadoghq.com/integrations/java/ for more information"
        print "#" * 80
        print "\n"
        print "You have to specify one of the following commands:"
        for command, desc in JMX_LIST_COMMANDS.iteritems():
            print "      - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc)
        print "Example: sudo /etc/init.d/datadog-agent jmx list_matching_attributes tomcat jmx solr"
        print "\n"

    else:
        jmx_command = args[0]
        checks_list = args[1:]
        confd_directory = get_confd_path()

        jmx_process = JMXFetch(confd_directory, agent_config)
        jmx_process.configure()
        should_run = jmx_process.should_run()

        if should_run:
            jmx_process.run(jmx_command, checks_list, reporter="console", redirect_std_streams=redirect_std_streams)
        else:
            print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory
            print "Have you enabled any JMX check ?"
            print "If you think it's not normal please get in touch with Datadog Support"
Esempio n. 16
0
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        pid_file = PidFile('dogstatsd')
        self.reporter = DummyReporter(aggregator)

        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()

        confd_path = Fixtures.directory()
        self.jmx_daemon = JMXFetch(confd_path, {'dogstatsd_port': STATSD_PORT})
        self.t2 = threading.Thread(target=self.jmx_daemon.run)
        self.t2.start()
Esempio n. 17
0
class JMXFetchProcess(multiprocessing.Process):
    def __init__(self, agentConfig, hostname):
        multiprocessing.Process.__init__(self, name='jmxfetch')
        self.config = agentConfig
        self.hostname = hostname

        try:
            osname = get_os()
            confd_path = get_confd_path(osname)
            self.jmx_daemon = JMXFetch(confd_path, agentConfig)
            self.jmx_daemon.configure()
            self.is_enabled = self.jmx_daemon.should_run()

        except PathNotFound:
            self.is_enabled = False

    def run(self):
        if self.is_enabled:
            self.jmx_daemon.run()

    def stop(self):
        pass
Esempio n. 18
0
    def __init__(self, agentConfig, hostname):
        multiprocessing.Process.__init__(self, name='jmxfetch')
        self.config = agentConfig
        self.hostname = hostname

        try:
            confd_path = get_confd_path()
            self.jmx_daemon = JMXFetch(confd_path, agentConfig)
            self.jmx_daemon.configure()
            self.is_enabled = self.jmx_daemon.should_run()

        except PathNotFound:
            self.is_enabled = False
Esempio n. 19
0
def main():
    options, args = get_parsed_args()
    agentConfig = get_config(options=options)
    # todo autorestart isn't used remove
    autorestart = agentConfig.get('autorestart', False)

    COMMANDS = [
        'start',
        'stop',
        'restart',
        'foreground',
        'status',
        'info',
        'check',
        'configcheck',
        'jmx',
    ]

    if len(args) < 1:
        sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS)))
        return 2

    command = args[0]
    if command not in COMMANDS:
        sys.stderr.write("Unknown command: %s\n" % command)
        return 3

    pid_file = PidFile('mon-agent')

    if options.clean:
        pid_file.clean()

    agent = CollectorDaemon(pid_file.get_path(), autorestart)

    if command in START_COMMANDS:
        log.info('Agent version %s' % get_version())

    if 'start' == command:
        log.info('Start daemon')
        agent.start()

    elif 'stop' == command:
        log.info('Stop daemon')
        agent.stop()

    elif 'restart' == command:
        log.info('Restart daemon')
        agent.restart()

    elif 'status' == command:
        agent.status()

    elif 'info' == command:
        return agent.info(verbose=options.verbose)

    elif 'foreground' == command:
        logging.info('Running in foreground')
        if autorestart:
            # Set-up the supervisor callbacks and fork it.
            logging.info('Running Agent with auto-restart ON')

            def child_func():
                agent.run()

            def parent_func():
                agent.start_event = False

            AgentSupervisor.start(parent_func, child_func)
        else:
            # Run in the standard foreground.
            agent.run(config=agentConfig)

    elif 'check' == command:
        check_name = args[1]
        try:
            # Try the old-style check first
            print getattr(collector.checks.collector, check_name)(log).check(agentConfig)
        except Exception:
            # If not an old-style check, try checks_d
            checks = load_check_directory(agentConfig)
            for check in checks['initialized_checks']:
                if check.name == check_name:
                    check.run()
                    print check.get_metrics()
                    print check.get_events()
                    if len(args) == 3 and args[2] == 'check_rate':
                        print "Running 2nd iteration to capture rate metrics"
                        time.sleep(1)
                        check.run()
                        print check.get_metrics()
                        print check.get_events()

    elif 'configcheck' == command or 'configtest' == command:
        osname = get_os()
        all_valid = True
        for conf_path in glob.glob(os.path.join(get_confd_path(osname), "*.yaml")):
            basename = os.path.basename(conf_path)
            try:
                check_yaml(conf_path)
            except Exception as e:
                all_valid = False
                print "%s contains errors:\n    %s" % (basename, e)
            else:
                print "%s is valid" % basename
        if all_valid:
            print "All yaml files passed. You can now run the Monitoring agent."
            return 0
        else:
            print("Fix the invalid yaml files above in order to start the Monitoring agent. "
                  "A useful external tool for yaml parsing can be found at "
                  "http://yaml-online-parser.appspot.com/")
            return 1

    elif 'jmx' == command:
        from collector.jmxfetch import JMX_LIST_COMMANDS, JMXFetch

        if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys():
            print "#" * 80
            print "JMX tool to be used to help configuring your JMX checks."
            print "See http://docs.datadoghq.com/integrations/java/ for more information"
            print "#" * 80
            print "\n"
            print "You have to specify one of the following command:"
            for command, desc in JMX_LIST_COMMANDS.iteritems():
                print "      - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc)
            print "Example: sudo /etc/init.d/mon-agent jmx list_matching_attributes tomcat jmx solr"
            print "\n"

        else:
            jmx_command = args[1]
            checks_list = args[2:]
            confd_directory = get_confd_path(get_os())
            should_run = JMXFetch.init(
                confd_directory, agentConfig, get_logging_config(), 15, jmx_command, checks_list, reporter="console")
            if not should_run:
                print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory
                print "Have you enabled any JMX check ?"

    return 0
Esempio n. 20
0
 def _is_jmxfetch_enabled(self, config):
     confd_path = get_confd_path()
     jmxfetch = JMXFetch(confd_path, config)
     jmxfetch.configure()
     return jmxfetch.should_run()
Esempio n. 21
0
 def tearDown(self):
     self.server.stop()
     self.reporter.finished = True
     JMXFetch.stop()
Esempio n. 22
0
        checksd_path = get_checksd_path(osname)
        checks_paths.append(glob.glob(os.path.join(checksd_path, '*.py')))
    except PathNotFound, e:
        log.error(e.args[0])
        sys.exit(3)
        
    try:
        confd_path = get_confd_path(osname)
    except PathNotFound, e:
        log.error("No conf.d folder found at '%s' or in the directory where the Agent is currently deployed.\n" % e.args[0])
        sys.exit(3)

    from migration import migrate_old_style_configuration
    migrate_old_style_configuration(agentConfig, confd_path)

    JMXFetch.init(confd_path, agentConfig, get_logging_config(), DEFAULT_CHECK_FREQUENCY)

    # For backwards-compatability with old style checks, we have to load every
    # checks.d module and check for a corresponding config OR check if the old
    # config will "activate" the check.
    #
    # Once old-style checks aren't supported, we'll just read the configs and
    # import the corresponding check module
    for check in itertools.chain(*checks_paths):
        check_name = os.path.basename(check).split('.')[0]
        if check_name in initialized_checks or check_name in init_failed_checks:
            log.debug('Skipping check %s because it has already been loaded from another location', check)
            continue
        try:
            check_module = imp.load_source('checksd_%s' % check_name, check)
        except Exception, e:
Esempio n. 23
0
 def tearDown(self):
     self.server.stop()
     self.reporter.finished = True
     JMXFetch.stop()
Esempio n. 24
0
 def stop(self):
     log.debug("Windows Service - Stopping collector")
     self.collector.stop()
     if JMXFetch.is_running():
         JMXFetch.stop()
     self.running = False
Esempio n. 25
0
    def run(self, config=None):
        """Main loop of the collector"""

        # Gracefully exit on sigterm.
        signal.signal(signal.SIGTERM, self._handle_sigterm)

        if not Platform.is_windows():
            # A SIGUSR1 signals an exit with an autorestart
            signal.signal(signal.SIGUSR1, self._handle_sigusr1)

            # Handle Keyboard Interrupt
            signal.signal(signal.SIGINT, self._handle_sigterm)

            # A SIGHUP signals a configuration reload
            signal.signal(signal.SIGHUP, self._handle_sighup)
        else:
            sdk_integrations = get_sdk_integration_paths()
            for name, path in sdk_integrations.iteritems():
                lib_path = os.path.join(path, 'lib')
                if os.path.exists(lib_path):
                    sys.path.append(lib_path)

        # Save the agent start-up stats.
        CollectorStatus().persist()

        # Intialize the collector.
        if not config:
            config = get_config(parse_args=True)

        self._agentConfig = self._set_agent_config_hostname(config)
        hostname = get_hostname(self._agentConfig)
        systemStats = get_system_stats(
            proc_path=self._agentConfig.get('procfs_path', '/proc').rstrip('/')
        )
        emitters = self._get_emitters()

        # Initialize service discovery
        if self._agentConfig.get('service_discovery'):
            self.sd_backend = get_sd_backend(self._agentConfig)

        if self.sd_backend and _is_affirmative(self._agentConfig.get('sd_jmx_enable', False)):
            pipe_path = get_jmx_pipe_path()
            if Platform.is_windows():
                pipe_name = pipe_path.format(pipename=SD_PIPE_NAME)
            else:
                pipe_name = os.path.join(pipe_path, SD_PIPE_NAME)

            if os.access(pipe_path, os.W_OK):
                if not os.path.exists(pipe_name):
                    os.mkfifo(pipe_name)
                self.sd_pipe = os.open(pipe_name, os.O_RDWR) # RW to avoid blocking (will only W)

                # Initialize Supervisor proxy
                self.supervisor_proxy = self._get_supervisor_socket(self._agentConfig)
            else:
                log.debug('Unable to create pipe in temporary directory. JMX service discovery disabled.')

        # Load the checks.d checks
        self._checksd = load_check_directory(self._agentConfig, hostname)

        # Load JMX configs if available
        if self._jmx_service_discovery_enabled:
            self.sd_pipe_jmx_configs(hostname)

        # Initialize the Collector
        self.collector = Collector(self._agentConfig, emitters, systemStats, hostname)

        # In developer mode, the number of runs to be included in a single collector profile
        try:
            self.collector_profile_interval = int(
                self._agentConfig.get('collector_profile_interval', DEFAULT_COLLECTOR_PROFILE_INTERVAL))
        except ValueError:
            log.warn('collector_profile_interval is invalid. '
                     'Using default value instead (%s).' % DEFAULT_COLLECTOR_PROFILE_INTERVAL)
            self.collector_profile_interval = DEFAULT_COLLECTOR_PROFILE_INTERVAL

        # Configure the watchdog.
        self.check_frequency = int(self._agentConfig['check_freq'])
        watchdog = self._get_watchdog(self.check_frequency)

        # Initialize the auto-restarter
        self.restart_interval = int(self._agentConfig.get('restart_interval', RESTART_INTERVAL))
        self.agent_start = time.time()

        self.allow_profiling = self._agentConfig.get('allow_profiling', True)

        profiled = False
        collector_profiled_runs = 0

        # Run the main loop.
        while self.run_forever:
            # Setup profiling if necessary
            if self.allow_profiling and self.in_developer_mode and not profiled:
                try:
                    profiler = AgentProfiler()
                    profiler.enable_profiling()
                    profiled = True
                except Exception as e:
                    log.warn("Cannot enable profiler: %s" % str(e))

            if self.reload_configs_flag:
                if isinstance(self.reload_configs_flag, set):
                    self.reload_configs(checks_to_reload=self.reload_configs_flag)
                else:
                    self.reload_configs()

            # JMXFetch restarts should prompt re-piping *all* JMX configs
            if self._jmx_service_discovery_enabled and \
                    (not self.reload_configs_flag or isinstance(self.reload_configs_flag, set)):
                try:
                    jmx_launch = JMXFetch._get_jmx_launchtime()
                    if self.last_jmx_piped and self.last_jmx_piped < jmx_launch:
                        self.sd_pipe_jmx_configs(hostname)
                except Exception as e:
                    log.debug("could not stat JMX lunch file: %s", e)

            # Do the work. Pass `configs_reloaded` to let the collector know if it needs to
            # look for the AgentMetrics check and pop it out.
            self.collector.run(checksd=self._checksd,
                               start_event=self.start_event,
                               configs_reloaded=True if self.reload_configs_flag else False)

            self.reload_configs_flag = False

            # Look for change in the config template store.
            # The self.sd_backend.reload_check_configs flag is set
            # to True if a config reload is needed.
            if self._agentConfig.get('service_discovery') and self.sd_backend and \
               not self.sd_backend.reload_check_configs:
                try:
                    self.sd_backend.reload_check_configs = get_config_store(
                        self._agentConfig).crawl_config_template()
                except Exception as e:
                    log.warn('Something went wrong while looking for config template changes: %s' % str(e))

            # Check if we should run service discovery
            # The `reload_check_configs` flag can be set through the docker_daemon check or
            # using ConfigStore.crawl_config_template
            if self._agentConfig.get('service_discovery') and self.sd_backend and \
               self.sd_backend.reload_check_configs:
                self.reload_configs_flag = self.sd_backend.reload_check_configs
                self.sd_backend.reload_check_configs = False

            if profiled:
                if collector_profiled_runs >= self.collector_profile_interval:
                    try:
                        profiler.disable_profiling()
                        profiled = False
                        collector_profiled_runs = 0
                    except Exception as e:
                        log.warn("Cannot disable profiler: %s" % str(e))

            # Check if we should restart.
            if self.autorestart and self._should_restart():
                self._do_restart()

            # Only plan for next loop if we will continue, otherwise exit quickly.
            if self.run_forever:
                if watchdog:
                    watchdog.reset()
                if profiled:
                    collector_profiled_runs += 1
                log.debug("Sleeping for {0} seconds".format(self.check_frequency))
                time.sleep(self.check_frequency)

        # Now clean-up.
        try:
            CollectorStatus.remove_latest_status()
        except Exception:
            pass

        # Explicitly kill the process, because it might be running as a daemon.
        log.info("Exiting. Bye bye.")
        sys.exit(0)
Esempio n. 26
0
            print "All yaml files passed. You can now run the Datadog agent."
        else:
            print("Fix the invalid yaml files above in order to start the Datadog agent. "
                    "A useful external tool for yaml parsing can be found at "
                    "http://yaml-online-parser.appspot.com/")

    elif 'jmx' == command:
        from jmxfetch import JMX_LIST_COMMANDS, JMXFetch
       
        if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS:
            print "You have to specify one of the following command %s" % JMX_LIST_COMMANDS
        else:
            jmx_command = args[1]
            from config import get_confd_path, get_logging_config
            from util import get_os
            JMXFetch.init(get_confd_path(get_os()), agentConfig, get_logging_config(), 15, jmx_command)


    return 0


if __name__ == '__main__':
    try:
        sys.exit(main())
    except StandardError:
        # Try our best to log the error.
        try:
            log.exception("Uncaught error running the Agent")
        except Exception:
            pass
        raise
Esempio n. 27
0
        checks_paths.append(glob.glob(os.path.join(checksd_path, '*.py')))
    except PathNotFound, e:
        log.error(e.args[0])
        sys.exit(3)

    try:
        confd_path = get_confd_path(osname)
    except PathNotFound, e:
        log.error("No conf.d folder found at '%s' or in the directory where the Agent is currently deployed.\n" % e.args[0])
        sys.exit(3)

    # Migrate datadog.conf integration configurations that are not supported anymore
    migrate_old_style_configuration(agentConfig, confd_path, get_config_path(None, os_name=get_os()))

    # Start JMXFetch if needed
    JMXFetch.init(confd_path, agentConfig, get_logging_config(), DEFAULT_CHECK_FREQUENCY, JMX_COLLECT_COMMAND)

    # For backwards-compatability with old style checks, we have to load every
    # checks.d module and check for a corresponding config OR check if the old
    # config will "activate" the check.
    #
    # Once old-style checks aren't supported, we'll just read the configs and
    # import the corresponding check module
    for check in itertools.chain(*checks_paths):
        check_name = os.path.basename(check).split('.')[0]
        if check_name in initialized_checks or check_name in init_failed_checks:
            log.debug('Skipping check %s because it has already been loaded from another location', check)
            continue
        try:
            check_module = imp.load_source('checksd_%s' % check_name, check)
        except Exception, e:
Esempio n. 28
0
def main():
    options, args = get_parsed_args()
    agentConfig = get_config(options=options)
    autorestart = agentConfig.get('autorestart', False)
    hostname = get_hostname(agentConfig)

    COMMANDS_AGENT = [
        'start',
        'stop',
        'restart',
        'status',
        'foreground',
    ]

    COMMANDS_NO_AGENT = [
        'info',
        'check',
        'configcheck',
        'jmx',
        'flare',
    ]

    COMMANDS = COMMANDS_AGENT + COMMANDS_NO_AGENT

    if len(args) < 1:
        sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS)))
        return 2

    command = args[0]
    if command not in COMMANDS:
        sys.stderr.write("Unknown command: %s\n" % command)
        return 3

    # Deprecation notice
    if command not in DD_AGENT_COMMANDS:
        # Will become an error message and exit after deprecation period
        from utils.deprecations import deprecate_old_command_line_tools
        deprecate_old_command_line_tools()

    if command in COMMANDS_AGENT:
        agent = Agent(PidFile('dd-agent').get_path(), autorestart)

    if command in START_COMMANDS:
        log.info('Agent version %s' % get_version())

    if 'start' == command:
        log.info('Start daemon')
        agent.start()

    elif 'stop' == command:
        log.info('Stop daemon')
        agent.stop()

    elif 'restart' == command:
        log.info('Restart daemon')
        agent.restart()

    elif 'status' == command:
        agent.status()

    elif 'info' == command:
        return Agent.info(verbose=options.verbose)

    elif 'foreground' == command:
        logging.info('Running in foreground')
        if autorestart:
            # Set-up the supervisor callbacks and fork it.
            logging.info('Running Agent with auto-restart ON')

            def child_func():
                agent.start(foreground=True)

            def parent_func():
                agent.start_event = False

            AgentSupervisor.start(parent_func, child_func)
        else:
            # Run in the standard foreground.
            agent.start(foreground=True)

    elif 'check' == command:
        if len(args) < 2:
            sys.stderr.write(
                "Usage: %s check <check_name> [check_rate]\n"
                "Add check_rate as last argument to compute rates\n" %
                sys.argv[0])
            return 1

        check_name = args[1]
        try:
            import checks.collector
            # Try the old-style check first
            print getattr(checks.collector, check_name)(log).check(agentConfig)
        except Exception:
            # If not an old-style check, try checks.d
            checks = load_check_directory(agentConfig, hostname)
            for check in checks['initialized_checks']:
                if check.name == check_name:
                    check.run()
                    print check.get_metrics()
                    print check.get_events()
                    print check.get_service_checks()
                    if len(args) == 3 and args[2] == 'check_rate':
                        print "Running 2nd iteration to capture rate metrics"
                        time.sleep(1)
                        check.run()
                        print check.get_metrics()
                        print check.get_events()
                        print check.get_service_checks()
                    check.stop()

    elif 'configcheck' == command or 'configtest' == command:
        configcheck()

    elif 'jmx' == command:
        from jmxfetch import JMX_LIST_COMMANDS, JMXFetch

        if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys():
            print "#" * 80
            print "JMX tool to be used to help configuring your JMX checks."
            print "See http://docs.datadoghq.com/integrations/java/ for more information"
            print "#" * 80
            print "\n"
            print "You have to specify one of the following commands:"
            for command, desc in JMX_LIST_COMMANDS.iteritems():
                print "      - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command,
                                                                     desc)
            print "Example: sudo /etc/init.d/datadog-agent jmx list_matching_attributes tomcat jmx solr"
            print "\n"

        else:
            jmx_command = args[1]
            checks_list = args[2:]
            confd_directory = get_confd_path(get_os())

            jmx_process = JMXFetch(confd_directory, agentConfig)
            jmx_process.configure()
            should_run = jmx_process.should_run()

            if should_run:
                jmx_process.run(jmx_command, checks_list, reporter="console")
            else:
                print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory
                print "Have you enabled any JMX check ?"
                print "If you think it's not normal please get in touch with Datadog Support"

    elif 'flare' == command:
        Flare.check_user_rights()
        case_id = int(args[1]) if len(args) > 1 else None
        f = Flare(True, case_id)
        f.collect()
        try:
            f.upload()
        except Exception, e:
            print 'The upload failed:\n{0}'.format(str(e))
Esempio n. 29
0
        sys.exit(3)

    try:
        confd_path = get_confd_path(osname)
    except PathNotFound, e:
        log.error(
            "No conf.d folder found at '%s' or in the directory where the Agent is currently deployed.\n"
            % e.args[0])
        sys.exit(3)

    # Migrate datadog.conf integration configurations that are not supported anymore
    migrate_old_style_configuration(agentConfig, confd_path,
                                    get_config_path(None, os_name=get_os()))

    # Start JMXFetch if needed
    JMXFetch.init(confd_path, agentConfig, get_logging_config(),
                  DEFAULT_CHECK_FREQUENCY, JMX_COLLECT_COMMAND)

    # We don't support old style configs anymore
    # So we iterate over the files in the checks.d directory
    # If there is a matching configuration file in the conf.d directory
    # then we import the check
    for check in itertools.chain(*checks_paths):
        check_name = os.path.basename(check).split('.')[0]
        check_config = None
        if check_name in initialized_checks or check_name in init_failed_checks:
            log.debug(
                'Skipping check %s because it has already been loaded from another location',
                check)
            continue

        # Let's see if there is a conf.d for this check
Esempio n. 30
0
 def _should_run_jmx(self):
     jmx_process = JMXFetch(get_confd_path(), self._config)
     jmx_process.configure(clean_status_file=False)
     return jmx_process.should_run()
Esempio n. 31
0
class TestHbase_regionserver(unittest.TestCase):
    """Basic Test for hbase_regionserver integration."""
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        self.reporter = DummyReporter(aggregator)

        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()

        confd_path = os.path.join(os.path.dirname(__file__), 'ci/resources/')

        self.jmx_daemon = JMXFetch(confd_path, {'dogstatsd_port': STATSD_PORT})
        self.t2 = threading.Thread(target=self.jmx_daemon.run)
        self.t2.start()

    def tearDown(self):
        self.server.stop()
        self.reporter.finished = True
        self.jmx_daemon.terminate()

    def testCustomJMXMetric(self):
        count = 0
        while self.reporter.metrics is None:
            time.sleep(1)
            count += 1
            if count > 60:
                raise Exception("No metrics were received in 60 seconds")

        metrics = self.reporter.metrics
        self.assertTrue(isinstance(metrics, list))
        self.assertTrue(len(metrics) > 0)

        self.assertTrue(
            len([
                t for t in metrics if "jvm." in t['metric']
                and "instance:hbase_regionserver-localhost-10102" in t['tags']
            ]) >= 13, metrics)

        # waiting for receiving metrics which appears after a while.
        count = 0
        while True:
            metrics = self.reporter.metrics
            mutations_metrics = [
                t for t in metrics
                if "hbase.regionserver.server.mutations" in t['metric']
                and "instance:hbase_regionserver-localhost-10102" in t['tags']
            ]
            slow_appned_metrics = [
                t for t in metrics
                if "hbase.regionserver.server.slow_append" in t['metric']
                and "instance:hbase_regionserver-localhost-10102" in t['tags']
            ]
            # hedged_metrics = [t for t in metrics if "hbase.regionserver.server.hedged_read" in t['metric'] and "instance:hbase_regionserver-localhost-10102" in t['tags']]
            # pause_time_metrics = [t for t in metrics if "hbase.regionserver.server.pause_time" in t['metric'] and "instance:hbase_regionserver-localhost-10102" in t['tags']]
            time.sleep(1)
            count += 1
            if len(mutations_metrics) >= 2 and len(slow_appned_metrics) >= 1:
                break
            elif count <= 60:
                continue
            else:
                log.info(metrics)
                raise Exception("enough metrics were received in 60 seconds")

        # hbase.regionserver.server.hedged_read* and hbase.regionserver.server.pause* is ignored here
        # because these metrics won't emit until these process will actually happen.
        metrics = [
            t for t in self.reporter.metrics if "hbase." in t['metric']
            and "instance:hbase_regionserver-localhost-10102" in t['tags']
        ]
        num_total = 159
        num_hedged_read = 2
        num_pause = 14
        num = len(metrics)
        log.info(num)
        self.assertTrue(num >= (num_total - num_hedged_read - num_pause),
                        metrics)
Esempio n. 32
0
 def _should_run_jmx(self):
     jmx_process = JMXFetch(get_confd_path(), self._config)
     jmx_process.configure(clean_status_file=False)
     return jmx_process.should_run()
Esempio n. 33
0
def main():
    options, args = get_parsed_args()
    agentConfig = get_config(options=options)
    autorestart = agentConfig.get('autorestart', False)
    hostname = get_hostname(agentConfig)
    in_developer_mode = agentConfig.get('developer_mode')
    COMMANDS_AGENT = [
        'start',
        'stop',
        'restart',
        'status',
        'foreground',
    ]

    COMMANDS_NO_AGENT = [
        'info',
        'check',
        'configcheck',
        'jmx',
        'flare',
    ]

    COMMANDS = COMMANDS_AGENT + COMMANDS_NO_AGENT

    if len(args) < 1:
        sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS)))
        return 2

    command = args[0]
    if command not in COMMANDS:
        sys.stderr.write("Unknown command: %s\n" % command)
        return 3

    # Deprecation notice
    if command not in DD_AGENT_COMMANDS:
        # Will become an error message and exit after deprecation period
        from utils.deprecations import deprecate_old_command_line_tools
        deprecate_old_command_line_tools()

    if command in COMMANDS_AGENT:
        agent = Agent(PidFile('dd-agent').get_path(), autorestart, in_developer_mode=in_developer_mode)

    if command in START_COMMANDS:
        log.info('Agent version %s' % get_version())

    if 'start' == command:
        log.info('Start daemon')
        agent.start()

    elif 'stop' == command:
        log.info('Stop daemon')
        agent.stop()

    elif 'restart' == command:
        log.info('Restart daemon')
        agent.restart()

    elif 'status' == command:
        agent.status()

    elif 'info' == command:
        return Agent.info(verbose=options.verbose)

    elif 'foreground' == command:
        logging.info('Running in foreground')
        if autorestart:
            # Set-up the supervisor callbacks and fork it.
            logging.info('Running Agent with auto-restart ON')

            def child_func():
                agent.start(foreground=True)

            def parent_func():
                agent.start_event = False

            AgentSupervisor.start(parent_func, child_func)
        else:
            # Run in the standard foreground.
            agent.start(foreground=True)

    elif 'check' == command:
        if len(args) < 2:
            sys.stderr.write(
                "Usage: %s check <check_name> [check_rate]\n"
                "Add check_rate as last argument to compute rates\n"
                % sys.argv[0]
            )
            return 1

        check_name = args[1]
        try:
            import checks.collector
            # Try the old-style check first
            print getattr(checks.collector, check_name)(log).check(agentConfig)
        except Exception:
            # If not an old-style check, try checks.d
            checks = load_check_directory(agentConfig, hostname)
            for check in checks['initialized_checks']:
                if check.name == check_name:
                    if in_developer_mode:
                        check.run = AgentProfiler.wrap_profiling(check.run)

                    cs = Collector.run_single_check(check, verbose=True)
                    print CollectorStatus.render_check_status(cs)

                    if len(args) == 3 and args[2] == 'check_rate':
                        print "Running 2nd iteration to capture rate metrics"
                        time.sleep(1)
                        cs = Collector.run_single_check(check, verbose=True)
                        print CollectorStatus.render_check_status(cs)

                    check.stop()

    elif 'configcheck' == command or 'configtest' == command:
        configcheck()

    elif 'jmx' == command:
        if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys():
            print "#" * 80
            print "JMX tool to be used to help configuring your JMX checks."
            print "See http://docs.datadoghq.com/integrations/java/ for more information"
            print "#" * 80
            print "\n"
            print "You have to specify one of the following commands:"
            for command, desc in JMX_LIST_COMMANDS.iteritems():
                print "      - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc)
            print "Example: sudo /etc/init.d/datadog-agent jmx list_matching_attributes tomcat jmx solr"
            print "\n"

        else:
            jmx_command = args[1]
            checks_list = args[2:]
            confd_directory = get_confd_path(get_os())

            jmx_process = JMXFetch(confd_directory, agentConfig)
            jmx_process.configure()
            should_run = jmx_process.should_run()

            if should_run:
                jmx_process.run(jmx_command, checks_list, reporter="console")
            else:
                print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory
                print "Have you enabled any JMX check ?"
                print "If you think it's not normal please get in touch with Datadog Support"

    elif 'flare' == command:
        Flare.check_user_rights()
        case_id = int(args[1]) if len(args) > 1 else None
        f = Flare(True, case_id)
        f.collect()
        try:
            f.upload()
        except Exception, e:
            print 'The upload failed:\n{0}'.format(str(e))
Esempio n. 34
0
            print "\n"
            print "You have to specify one of the following commands:"
            for command, desc in JMX_LIST_COMMANDS.iteritems():
                print "      - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command,
                                                                     desc)
            print "Example: sudo /etc/init.d/datadog-agent jmx list_matching_attributes tomcat jmx solr"
            print "\n"

        else:
            jmx_command = args[1]
            checks_list = args[2:]
            confd_directory = get_confd_path(get_os())
            should_run = JMXFetch.init(confd_directory,
                                       agentConfig,
                                       get_logging_config(),
                                       15,
                                       jmx_command,
                                       checks_list,
                                       reporter="console")
            if not should_run:
                print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory
                print "Have you enabled any JMX check ?"
                print "If you think it's not normal please get in touch with Datadog Support"

    return 0


if __name__ == '__main__':
    try:
        sys.exit(main())
    except StandardError:
Esempio n. 35
0
            print "#" * 80
            print "JMX tool to be used to help configuring your JMX checks."
            print "See http://docs.datadoghq.com/integrations/java/ for more information"
            print "#" * 80
            print "\n"
            print "You have to specify one of the following commands:"
            for command, desc in JMX_LIST_COMMANDS.iteritems():
                print "      - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc)
            print "Example: sudo /etc/init.d/datadog-agent jmx list_matching_attributes tomcat jmx solr"
            print "\n"

        else:
            jmx_command = args[1]
            checks_list = args[2:]
            confd_directory = get_confd_path(get_os())
            should_run  = JMXFetch.init(confd_directory, agentConfig, get_logging_config(), 15, jmx_command, checks_list, reporter="console")
            if not should_run:
                print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory
                print "Have you enabled any JMX check ?"
                print "If you think it's not normal please get in touch with Datadog Support"


    return 0


if __name__ == '__main__':
    try:
        sys.exit(main())
    except StandardError:
        # Try our best to log the error.
        try:
Esempio n. 36
0
class TestKafka(unittest.TestCase):
    """Basic Test for kafka integration."""
    def setUp(self):
        aggregator = MetricsAggregator("test_host")
        self.server = Server(aggregator, "localhost", STATSD_PORT)
        self.reporter = DummyReporter(aggregator)

        self.t1 = threading.Thread(target=self.server.start)
        self.t1.start()

        confd_path = os.path.join(os.path.dirname(__file__), 'ci/resources/')

        self.jmx_daemon = JMXFetch(confd_path, {'dogstatsd_port': STATSD_PORT})
        self.t2 = threading.Thread(target=self.jmx_daemon.run)
        self.t2.start()

    def tearDown(self):
        self.server.stop()
        self.reporter.finished = True
        self.jmx_daemon.terminate()

    def testCustomJMXMetric(self):
        count = 0
        while self.reporter.metrics is None:
            time.sleep(1)
            count += 1
            if count > 25:
                raise Exception("No metrics were received in 25 seconds")

        metrics = self.reporter.metrics
        # expected_tags = ['env:test', 'instance:kafka-172.17.0.1-9999', 'kafka:broker']

        self.assertTrue(isinstance(metrics, ListType))
        self.assertTrue(len(metrics) > 0)
        log.info(metrics)
        log.info(len(metrics))
        self.assertTrue(
            len([
                t for t in metrics if "jvm." in t['metric']
                and "instance:kafka-172.17.0.1-9999" in t['tags']
            ]) >= 13, metrics)
        self.assertTrue(
            len([
                t for t in metrics if "kafka.request." in t['metric']
                and "instance:kafka-172.17.0.1-9999" in t['tags']
            ]) == 12, metrics)
        self.assertTrue(
            len([
                t for t in metrics if "kafka.replication." in t['metric']
                and "instance:kafka-172.17.0.1-9999" in t['tags']
            ]) == 6, metrics)

        # CLIENT metrics.
        # kafka.producer.request_latency_avg
        self.assertTrue(
            len([
                t for t in metrics if "kafka.producer." in t['metric']
                and "instance:kafka-172.17.0.1-7777" in t['tags']
            ]) == 1, metrics)
        # kafka.consumer.fetch_rate, kafka.consumer.max_lag
        self.assertTrue(
            len([
                t for t in metrics if "kafka.consumer." in t['metric']
                and "instance:kafka-172.17.0.1-7777" in t['tags']
            ]) == 2, metrics)
Esempio n. 37
0
 def stop(self):
     log.debug("Windows Service - Stopping collector")
     self.collector.stop()
     if JMXFetch.is_running():
         JMXFetch.stop()
     self.running = False
Esempio n. 38
0
 def _is_jmxfetch_enabled(self, config):
     confd_path = get_confd_path()
     jmxfetch = JMXFetch(confd_path, config)
     jmxfetch.configure()
     return jmxfetch.should_run()
Esempio n. 39
0
        checks_paths.append(glob.glob(os.path.join(checksd_path, '*.py')))
    except PathNotFound, e:
        log.error(e.args[0])
        sys.exit(3)

    try:
        confd_path = get_confd_path(osname)
    except PathNotFound, e:
        log.error("No conf.d folder found at '%s' or in the directory where the Agent is currently deployed.\n" % e.args[0])
        sys.exit(3)

    # Migrate datadog.conf integration configurations that are not supported anymore
    migrate_old_style_configuration(agentConfig, confd_path, get_config_path(None, os_name=get_os()))

    # Start JMXFetch if needed
    JMXFetch.init(confd_path, agentConfig, get_logging_config(), DEFAULT_CHECK_FREQUENCY)

    # For backwards-compatability with old style checks, we have to load every
    # checks.d module and check for a corresponding config OR check if the old
    # config will "activate" the check.
    #
    # Once old-style checks aren't supported, we'll just read the configs and
    # import the corresponding check module
    for check in itertools.chain(*checks_paths):
        check_name = os.path.basename(check).split('.')[0]
        if check_name in initialized_checks or check_name in init_failed_checks:
            log.debug('Skipping check %s because it has already been loaded from another location', check)
            continue
        try:
            check_module = imp.load_source('checksd_%s' % check_name, check)
        except Exception, e:
Esempio n. 40
0
def main():
    options, args = get_parsed_args()
    agentConfig = get_config(options=options)
    autorestart = agentConfig.get("autorestart", False)
    hostname = get_hostname(agentConfig)

    COMMANDS = ["start", "stop", "restart", "foreground", "status", "info", "check", "configcheck", "jmx", "flare"]

    if len(args) < 1:
        sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS)))
        return 2

    command = args[0]
    if command not in COMMANDS:
        sys.stderr.write("Unknown command: %s\n" % command)
        return 3

    pid_file = PidFile("dd-agent")

    if options.clean:
        pid_file.clean()

    agent = Agent(pid_file.get_path(), autorestart)

    if command in START_COMMANDS:
        log.info("Agent version %s" % get_version())

    if "start" == command:
        log.info("Start daemon")
        agent.start()

    elif "stop" == command:
        log.info("Stop daemon")
        agent.stop()

    elif "restart" == command:
        log.info("Restart daemon")
        agent.restart()

    elif "status" == command:
        agent.status()

    elif "info" == command:
        return agent.info(verbose=options.verbose)

    elif "foreground" == command:
        logging.info("Running in foreground")
        if autorestart:
            # Set-up the supervisor callbacks and fork it.
            logging.info("Running Agent with auto-restart ON")

            def child_func():
                agent.start(foreground=True)

            def parent_func():
                agent.start_event = False

            AgentSupervisor.start(parent_func, child_func)
        else:
            # Run in the standard foreground.
            agent.start(foreground=True)

    elif "check" == command:
        if len(args) < 2:
            sys.stderr.write(
                "Usage: %s check <check_name> [check_rate]\n"
                "Add check_rate as last argument to compute rates\n" % sys.argv[0]
            )
            return 1

        check_name = args[1]
        try:
            import checks.collector

            # Try the old-style check first
            print getattr(checks.collector, check_name)(log).check(agentConfig)
        except Exception:
            # If not an old-style check, try checks.d
            checks = load_check_directory(agentConfig, hostname)
            for check in checks["initialized_checks"]:
                if check.name == check_name:
                    check.run()
                    print check.get_metrics()
                    print check.get_events()
                    print check.get_service_checks()
                    if len(args) == 3 and args[2] == "check_rate":
                        print "Running 2nd iteration to capture rate metrics"
                        time.sleep(1)
                        check.run()
                        print check.get_metrics()
                        print check.get_events()
                        print check.get_service_checks()
                    check.stop()

    elif "configcheck" == command or "configtest" == command:
        configcheck()

    elif "jmx" == command:
        from jmxfetch import JMX_LIST_COMMANDS, JMXFetch

        if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys():
            print "#" * 80
            print "JMX tool to be used to help configuring your JMX checks."
            print "See http://docs.datadoghq.com/integrations/java/ for more information"
            print "#" * 80
            print "\n"
            print "You have to specify one of the following commands:"
            for command, desc in JMX_LIST_COMMANDS.iteritems():
                print "      - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc)
            print "Example: sudo /etc/init.d/datadog-agent jmx list_matching_attributes tomcat jmx solr"
            print "\n"

        else:
            jmx_command = args[1]
            checks_list = args[2:]
            confd_directory = get_confd_path(get_os())

            jmx_process = JMXFetch(confd_directory, agentConfig)
            should_run = jmx_process.run(jmx_command, checks_list, reporter="console")
            if not should_run:
                print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory
                print "Have you enabled any JMX check ?"
                print "If you think it's not normal please get in touch with Datadog Support"

    elif "flare" == command:
        Flare.check_user_rights()
        case_id = int(args[1]) if len(args) > 1 else None
        f = Flare(True, case_id)
        f.collect()
        try:
            f.upload()
        except Exception, e:
            print "The upload failed:\n{0}".format(str(e))