Example #1
0
def get_hostname(config=None):
    """
    Get the canonical host name this agent should identify as. This is
    the authoritative source of the host name for the agent.

    Tries, in order:

      * agent config (agent.conf, "hostname:")
      * 'hostname -f' (on unix)
      * socket.gethostname()
    """
    hostname = None

    # first, try the config
    if config is None:
        from monagent.common.config import get_config
        config = get_config(parse_args=True)
    config_hostname = config.get('hostname')
    if config_hostname and is_valid_hostname(config_hostname):
        return config_hostname

    # then move on to os-specific detection
    if hostname is None:

        def _get_hostname_unix():
            try:
                # try fqdn
                p = subprocess.Popen(['/bin/hostname', '-f'],
                                     stdout=subprocess.PIPE)
                out, err = p.communicate()
                if p.returncode == 0:
                    return out.strip()
            except Exception:
                return None

        os_name = get_os()
        if os_name in ['mac', 'freebsd', 'linux', 'solaris']:
            unix_hostname = _get_hostname_unix()
            if unix_hostname and is_valid_hostname(unix_hostname):
                hostname = unix_hostname

    # fall back on socket.gethostname(), socket.getfqdn() is too unreliable
    if hostname is None:
        try:
            socket_hostname = socket.gethostname()
        except socket.error:
            socket_hostname = None
        if socket_hostname and is_valid_hostname(socket_hostname):
            hostname = socket_hostname

    if hostname is None:
        log.critical(
            'Unable to reliably determine host name. You can define one in agent.conf or in your hosts file'
        )
        raise Exception(
            'Unable to reliably determine host name. You can define one in agent.conf or in your hosts file'
        )
    else:
        return hostname
Example #2
0
def get_hostname(config=None):
    """
    Get the canonical host name this agent should identify as. This is
    the authoritative source of the host name for the agent.

    Tries, in order:

      * agent config (agent.conf, "hostname:")
      * 'hostname -f' (on unix)
      * socket.gethostname()
    """
    hostname = None

    # first, try the config
    if config is None:
        from monagent.common.config import get_config
        config = get_config(parse_args=True)
    config_hostname = config.get('hostname')
    if config_hostname and is_valid_hostname(config_hostname):
        return config_hostname

    # then move on to os-specific detection
    if hostname is None:
        def _get_hostname_unix():
            try:
                # try fqdn
                p = subprocess.Popen(['/bin/hostname', '-f'], stdout=subprocess.PIPE)
                out, err = p.communicate()
                if p.returncode == 0:
                    return out.strip()
            except Exception:
                return None

        os_name = get_os()
        if os_name in ['mac', 'freebsd', 'linux', 'solaris']:
            unix_hostname = _get_hostname_unix()
            if unix_hostname and is_valid_hostname(unix_hostname):
                hostname = unix_hostname

    # fall back on socket.gethostname(), socket.getfqdn() is too unreliable
    if hostname is None:
        try:
            socket_hostname = socket.gethostname()
        except socket.error:
            socket_hostname = None
        if socket_hostname and is_valid_hostname(socket_hostname):
            hostname = socket_hostname

    if hostname is None:
        log.critical(
            'Unable to reliably determine host name. You can define one in agent.conf or in your hosts file')
        raise Exception(
            'Unable to reliably determine host name. You can define one in agent.conf or in your hosts file')
    else:
        return hostname
Example #3
0
 def run(self):
     log.debug("Windows Service - Starting forwarder")
     set_win32_cert_path()
     port = self.config.get('listen_port', 17123)
     if port is None:
         port = 17123
     else:
         port = int(port)
     app_config = get_config(parse_args=False)
     self.forwarder = Application(port, app_config, watchdog=False)
     self.forwarder.run()
Example #4
0
    def __init__(self, args):
        win32serviceutil.ServiceFramework.__init__(self, args)
        self.hWaitStop = win32event.CreateEvent(None, 0, 0, None)
        config = get_config(parse_args=False)

        # Setup the correct options so the agent will use the forwarder
        opts, args = Values({
            'clean': False,
            'disabled_dd': False
        }), []
        agentConfig = get_config(parse_args=False, options=opts)
        self.restart_interval = \
            int(agentConfig.get('autorestart_interval', RESTART_INTERVAL))
        log.info("Autorestarting the collector ever %s seconds" % self.restart_interval)

        # Keep a list of running processes so we can start/end as needed.
        # Processes will start started in order and stopped in reverse order.
        self.procs = {
            'forwarder': DDForwarder(config),
            'collector': DDAgent(agentConfig),
            'dogstatsd': DogstatsdProcess(config),
            'pup': PupProcess(config),
        }
Example #5
0
def init_monstatsd(config_path=None, use_watchdog=False):
    """Configure the server and the reporting thread.
    """
    c = get_config(parse_args=False, cfg_path=config_path)
    log.debug("Configuration monstatsd")

    port = c['monstatsd_port']
    interval = int(c['monstatsd_interval'])
    aggregator_interval = int(c['monstatsd_agregator_bucket_size'])
    non_local_traffic = c['non_local_traffic']
    forward_to_host = c.get('statsd_forward_host')
    forward_to_port = c.get('statsd_forward_port')
    event_chunk_size = c.get('event_chunk_size')

    target = c['forwarder_url']

    hostname = get_hostname(c)

    # Create the aggregator (which is the point of communication between the
    # server and reporting threads.
    assert 0 < interval

    aggregator = MetricsBucketAggregator(hostname,
                                         aggregator_interval,
                                         recent_point_threshold=c.get(
                                             'recent_point_threshold', None))

    # Start the reporting thread.
    reporter = Reporter(interval, aggregator, target, use_watchdog,
                        event_chunk_size)

    # Start the server on an IPv4 stack
    # Default to loopback
    server_host = 'localhost'
    # If specified, bind to all addressses
    if non_local_traffic:
        server_host = ''

    server = Server(aggregator,
                    server_host,
                    port,
                    forward_to_host=forward_to_host,
                    forward_to_port=forward_to_port)

    return reporter, server, c
Example #6
0
def init_monstatsd(config_path=None, use_watchdog=False):
    """Configure the server and the reporting thread.
    """
    c = get_config(parse_args=False, cfg_path=config_path)
    log.debug("Configuration monstatsd")

    port = c['monstatsd_port']
    interval = int(c['monstatsd_interval'])
    aggregator_interval = int(c['monstatsd_agregator_bucket_size'])
    non_local_traffic = c['non_local_traffic']
    forward_to_host = c.get('statsd_forward_host')
    forward_to_port = c.get('statsd_forward_port')
    event_chunk_size = c.get('event_chunk_size')

    target = c['forwarder_url']

    hostname = get_hostname(c)

    # Create the aggregator (which is the point of communication between the
    # server and reporting threads.
    assert 0 < interval

    aggregator = MetricsBucketAggregator(
        hostname,
        aggregator_interval,
        recent_point_threshold=c.get(
            'recent_point_threshold',
            None))

    # Start the reporting thread.
    reporter = Reporter(interval, aggregator, target, use_watchdog, event_chunk_size)

    # Start the server on an IPv4 stack
    # Default to loopback
    server_host = 'localhost'
    # If specified, bind to all addressses
    if non_local_traffic:
        server_host = ''

    server = Server(aggregator, server_host, port, forward_to_host=forward_to_host,
                    forward_to_port=forward_to_port)

    return reporter, server, c
Example #7
0
def init_forwarder(skip_ssl_validation=False, use_simple_http_client=False):
    agent_config = get_config(parse_args=False)

    port = agent_config.get('listen_port', 17123)
    if port is None:
        port = 17123
    else:
        port = int(port)

    app = Forwarder(port, agent_config, skip_ssl_validation=skip_ssl_validation,
                    use_simple_http_client=use_simple_http_client)

    def sigterm_handler(signum, frame):
        log.info("caught sigterm. stopping")
        app.stop()

    signal.signal(signal.SIGTERM, sigterm_handler)
    signal.signal(signal.SIGINT, sigterm_handler)

    return app
Example #8
0
def init_forwarder(skip_ssl_validation=False, use_simple_http_client=False):
    agent_config = get_config(parse_args=False)

    port = agent_config.get('listen_port', 17123)
    if port is None:
        port = 17123
    else:
        port = int(port)

    app = Forwarder(port,
                    agent_config,
                    skip_ssl_validation=skip_ssl_validation,
                    use_simple_http_client=use_simple_http_client)

    def sigterm_handler(signum, frame):
        log.info("caught sigterm. stopping")
        app.stop()

    signal.signal(signal.SIGTERM, sigterm_handler)
    signal.signal(signal.SIGINT, sigterm_handler)

    return app
Example #9
0
    def run(self, config=None):
        """Main loop of the collector"""

        # Gracefully exit on sigterm.
        signal.signal(signal.SIGTERM, self._handle_sigterm)

        # A SIGUSR1 signals an exit with an autorestart
        signal.signal(signal.SIGUSR1, self._handle_sigusr1)

        # Handle Keyboard Interrupt
        signal.signal(signal.SIGINT, self._handle_sigterm)

        # Save the agent start-up stats.
        CollectorStatus().persist()

        # Intialize the collector.
        if config is None:
            config = get_config(parse_args=True)

        # Load the checks_d checks
        checksd = load_check_directory(config)
        self.collector = Collector(config, http_emitter, checksd)

        # Configure the watchdog.
        check_frequency = int(config['check_freq'])
        watchdog = self._get_watchdog(check_frequency, config)

        # Initialize the auto-restarter
        self.restart_interval = int(config.get('restart_interval', RESTART_INTERVAL))
        self.agent_start = time.time()

        # Run the main loop.
        while self.run_forever:

            # enable profiler if needed
            profiled = False
            if config.get('profile', False) and config.get('profile').lower() == 'yes':
                try:
                    import cProfile
                    profiler = cProfile.Profile()
                    profiled = True
                    profiler.enable()
                    log.debug("Agent profiling is enabled")
                except Exception:
                    log.warn("Cannot enable profiler")

            # Do the work.
            self.collector.run()

            # disable profiler and printout stats to stdout
            if config.get('profile', False) and config.get('profile').lower() == 'yes' and profiled:
                try:
                    profiler.disable()
                    import pstats
                    from cStringIO import StringIO
                    s = StringIO()
                    ps = pstats.Stats(profiler, stream=s).sort_stats("cumulative")
                    ps.print_stats()
                    log.debug(s.getvalue())
                except Exception:
                    log.warn("Cannot disable profiler")

            # Check if we should restart.
            if self.autorestart and self._should_restart():
                self._do_restart()

            # Only plan for the next loop if we will continue,
            # otherwise just exit quickly.
            if self.run_forever:
                if watchdog:
                    watchdog.reset()
                time.sleep(check_frequency)

        # Now clean-up.
        try:
            CollectorStatus.remove_latest_status()
        except Exception:
            pass

        # Explicitly kill the process, because it might be running
        # as a daemon.
        log.info("Exiting. Bye bye.")
        sys.exit(0)
Example #10
0
def main():
    options, args = get_parsed_args()
    agentConfig = get_config(options=options)
    # todo autorestart isn't used remove
    autorestart = agentConfig.get('autorestart', False)

    COMMANDS = [
        'start',
        'stop',
        'restart',
        'foreground',
        'status',
        'info',
        'check',
        'configcheck',
        'jmx',
    ]

    if len(args) < 1:
        sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS)))
        return 2

    command = args[0]
    if command not in COMMANDS:
        sys.stderr.write("Unknown command: %s\n" % command)
        return 3

    pid_file = PidFile('mon-agent')

    if options.clean:
        pid_file.clean()

    agent = CollectorDaemon(pid_file.get_path(), autorestart)

    if command in START_COMMANDS:
        log.info('Agent version %s' % get_version())

    if 'start' == command:
        log.info('Start daemon')
        agent.start()

    elif 'stop' == command:
        log.info('Stop daemon')
        agent.stop()

    elif 'restart' == command:
        log.info('Restart daemon')
        agent.restart()

    elif 'status' == command:
        agent.status()

    elif 'info' == command:
        return agent.info(verbose=options.verbose)

    elif 'foreground' == command:
        logging.info('Running in foreground')
        if autorestart:
            # Set-up the supervisor callbacks and fork it.
            logging.info('Running Agent with auto-restart ON')

            def child_func():
                agent.run()

            def parent_func():
                agent.start_event = False

            AgentSupervisor.start(parent_func, child_func)
        else:
            # Run in the standard foreground.
            agent.run(config=agentConfig)

    elif 'check' == command:
        check_name = args[1]
        try:
            # Try the old-style check first
            print getattr(collector.checks.collector, check_name)(log).check(agentConfig)
        except Exception:
            # If not an old-style check, try checks_d
            checks = load_check_directory(agentConfig)
            for check in checks['initialized_checks']:
                if check.name == check_name:
                    check.run()
                    print check.get_metrics()
                    print check.get_events()
                    if len(args) == 3 and args[2] == 'check_rate':
                        print "Running 2nd iteration to capture rate metrics"
                        time.sleep(1)
                        check.run()
                        print check.get_metrics()
                        print check.get_events()

    elif 'configcheck' == command or 'configtest' == command:
        osname = get_os()
        all_valid = True
        for conf_path in glob.glob(os.path.join(get_confd_path(osname), "*.yaml")):
            basename = os.path.basename(conf_path)
            try:
                check_yaml(conf_path)
            except Exception as e:
                all_valid = False
                print "%s contains errors:\n    %s" % (basename, e)
            else:
                print "%s is valid" % basename
        if all_valid:
            print "All yaml files passed. You can now run the Monitoring agent."
            return 0
        else:
            print("Fix the invalid yaml files above in order to start the Monitoring agent. "
                  "A useful external tool for yaml parsing can be found at "
                  "http://yaml-online-parser.appspot.com/")
            return 1

    elif 'jmx' == command:
        from collector.jmxfetch import JMX_LIST_COMMANDS, JMXFetch

        if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys():
            print "#" * 80
            print "JMX tool to be used to help configuring your JMX checks."
            print "See http://docs.datadoghq.com/integrations/java/ for more information"
            print "#" * 80
            print "\n"
            print "You have to specify one of the following command:"
            for command, desc in JMX_LIST_COMMANDS.iteritems():
                print "      - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc)
            print "Example: sudo /etc/init.d/mon-agent jmx list_matching_attributes tomcat jmx solr"
            print "\n"

        else:
            jmx_command = args[1]
            checks_list = args[2:]
            confd_directory = get_confd_path(get_os())
            should_run = JMXFetch.init(
                confd_directory, agentConfig, get_logging_config(), 15, jmx_command, checks_list, reporter="console")
            if not should_run:
                print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory
                print "Have you enabled any JMX check ?"

    return 0
Example #11
0
 def testWhiteSpaceConfig(self):
     """Leading whitespace confuse ConfigParser
     """
     agent_config = get_config(cfg_path=os.path.join(
         os.path.dirname(os.path.realpath(__file__)), "badconfig.conf"))
     self.assertEqual(agent_config["api_key"], "1234")
Example #12
0
 def testWhiteSpaceConfig(self):
     """Leading whitespace confuse ConfigParser
     """
     agent_config = get_config(
         cfg_path=os.path.join(os.path.dirname(os.path.realpath(__file__)), "badconfig.conf"))
     self.assertEquals(agent_config["api_key"], "1234")