Esempio n. 1
0
    def test_apptags(self):
        '''
        Tests that the app tags are sent if specified so
        '''
        agentConfig = {
            'api_key': 'test_apikey',
            'collect_ec2_tags': False,
            'collect_instance_metadata': False,
            'create_dd_check_tags': True,
            'version': 'test',
            'tags': '',
        }

        # Run a single checks.d check as part of the collector.
        redis_config = {
            "init_config": {},
            "instances": [{
                "host": "localhost",
                "port": 6379
            }]
        }
        checks = [load_check('redisdb', redis_config, agentConfig)]

        c = Collector(agentConfig, [], {}, get_hostname(agentConfig))
        payload = c.run({
            'initialized_checks': checks,
            'init_failed_checks': {}
        })

        # We check that the redis DD_CHECK_TAG is sent in the payload
        self.assertTrue('dd_check:redisdb' in payload['host-tags']['system'])
Esempio n. 2
0
    def test_collector(self):
        agentConfig = {
            'api_key': 'test_apikey',
            'check_timings': True,
            'collect_ec2_tags': True,
            'collect_instance_metadata': False,
            'create_dd_check_tags': False,
            'version': 'test',
            'tags': '',
        }

        # Run a single checks.d check as part of the collector.
        redis_config = {
            "init_config": {},
            "instances": [{"host": "localhost", "port": 6379}]
        }
        checks = [load_check('redisdb', redis_config, agentConfig)]

        c = Collector(agentConfig, [], {}, get_hostname(agentConfig))
        payload, continue_immediately = c.run({
            'initialized_checks': checks,
            'init_failed_checks': {}
        })
        assert not continue_immediately
        metrics = payload['metrics']

        # Check that we got a timing metric for all checks.
        timing_metrics = [m for m in metrics
            if m[0] == 'stackstate.agent.check_run_time']
        all_tags = []
        for metric in timing_metrics:
            all_tags.extend(metric[3]['tags'])
        for check in checks:
            tag = "check:%s" % check.name
            assert tag in all_tags, all_tags
Esempio n. 3
0
    def test_collector(self):
        agentConfig = {
            'api_key': 'test_apikey',
            'check_timings': True,
            'collect_ec2_tags': True,
            'collect_orchestrator_tags': False,
            'collect_instance_metadata': False,
            'create_dd_check_tags': False,
            'version': 'test',
            'tags': '',
        }

        # Run a single checks.d check as part of the collector.
        disk_config = {"init_config": {}, "instances": [{}]}

        checks = [load_check('disk', disk_config, agentConfig)]

        c = Collector(agentConfig, [], {}, get_hostname(agentConfig))
        payload = c.run({
            'initialized_checks': checks,
            'init_failed_checks': {}
        })
        metrics = payload['metrics']

        # Check that we got a timing metric for all checks.
        timing_metrics = [
            m for m in metrics if m[0] == 'datadog.agent.check_run_time'
        ]
        all_tags = []
        for metric in timing_metrics:
            all_tags.extend(metric[3]['tags'])
        for check in checks:
            tag = "check:%s" % check.name
            assert tag in all_tags, all_tags
Esempio n. 4
0
    def test_apptags(self):
        '''
        Tests that the app tags are sent if specified so
        '''
        agentConfig = {
            'agent_key': 'test_agentkey',
            'collect_ec2_tags': False,
            'collect_orchestrator_tags': False,
            'collect_instance_metadata': False,
            'create_sd_check_tags': True,
            'version': 'test',
            'tags': '',
        }

        # Run a single checks.d check as part of the collector.
        disk_config = {"init_config": {}, "instances": [{}]}

        checks = [load_check('disk', disk_config, agentConfig)]

        c = Collector(agentConfig, [], {}, get_hostname(agentConfig))
        payload = c.run({
            'initialized_checks': checks,
            'init_failed_checks': {}
        })

        # We check that the redis SD_CHECK_TAG is sent in the payload
        self.assertTrue('sd_check:disk' in payload['host-tags']['system'])
 def test_hostname_metadata(self):
     """
     Collect hostname metadata
     """
     c = Collector({"collect_instance_metadata": True}, None, {}, "foo")
     metadata = c._get_hostname_metadata()
     assert "hostname" in metadata
     assert "socket-fqdn" in metadata
     assert "socket-hostname" in metadata
Esempio n. 6
0
    def test_topology_collection(self):
        agentConfig = {
            'api_key': 'test_apikey',
            'check_timings': True,
            'collect_ec2_tags': True,
            'collect_instance_metadata': False,
            'create_dd_check_tags': False,
            'version': 'test',
            'tags': '',
        }

        # Run a single checks.d check as part of the collector.
        dummy_topology_check_config = {
            "init_config": {},
            "instances": [{"dummy_instance": "dummy_instance"}]
        }

# create dummy checks, creating two component and 1 relation
        check1 = DummyTopologyCheck(1, 'dummy_topology_check', dummy_topology_check_config.get('init_config'), agentConfig, instances=[{"instance_id": 1, "pass":True}, {"instance_id": 2, "pass":True}])
        check2 = DummyTopologyCheck(2, 'dummy_topology_check', dummy_topology_check_config.get('init_config'), agentConfig, instances=[{"instance_id": 3, "pass":True}, {"instance_id": 4, "pass":True}], snapshot=True)

        emitted_topologies = []

# mock emitter to pick up data emitted by the collector
        def mock_emitter(message, log, agentConfig, endpoint):
            emitted_topologies.extend(message['topologies'])

        c = Collector(agentConfig, [mock_emitter], {}, get_hostname(agentConfig))
        payload, _ = c.run({
            'initialized_checks': [check1, check2],
            'init_failed_checks': {}
        })
        topologies = payload['topologies']

        def assertTopology(topology, check, instance_id):
            self.assertEquals(topology['instance'], check.instance_key(instance_id))
            self.assertEquals(len(topology['components']), 2)
            self.assertEquals(len(topology['relations']), 1)
            self.assertEquals(check.expected_components(instance_id), topology['components'])
            self.assertEquals(check.expected_relations(), topology['relations'])
            if check.snapshot:
                self.assertTrue(topology["start_snapshot"])
                self.assertTrue(topology["stop_snapshot"])
            else:
                self.assertTrue("start_snapshot" not in topology)
                self.assertTrue("stop_snapshot" not in topology)

# Make sure the emissions of the collector are observed
        assertTopology(topologies[0], check1, 1)
        assertTopology(topologies[1], check1, 2)
        assertTopology(topologies[2], check2, 4)
        assertTopology(topologies[3], check2, 3)

        assertTopology(emitted_topologies[0], check1, 1)
        assertTopology(emitted_topologies[1], check1, 2)
        assertTopology(emitted_topologies[2], check2, 4)
        assertTopology(emitted_topologies[3], check2, 3)
Esempio n. 7
0
    def run(self):
        emitters = self.get_emitters()
        systemStats = get_system_stats()
        collector = Collector(self.config, emitters, systemStats)

        # Load the checks.d checks
        checksd = load_check_directory(self.config)

        # Main agent loop will run until interrupted
        while self.running:
            collector.run(checksd=checksd)
            time.sleep(self.config['check_freq'])
Esempio n. 8
0
    def run(self):
        log.debug("Windows Service - Starting collector")
        emitters = self.get_emitters()
        systemStats = get_system_stats()
        self.collector = Collector(self.config, emitters, systemStats)

        # Load the checks.d checks
        checksd = load_check_directory(self.config)

        # Main agent loop will run until interrupted
        while self.running:
            self.collector.run(checksd=checksd, start_event=self.start_event)
            time.sleep(self.config['check_freq'])
Esempio n. 9
0
    def run(self, agentConfig=None, run_forever=True):
        """Main loop of the collector"""
        agentLogger = logging.getLogger('agent')
        systemStats = get_system_stats()

        if agentConfig is None:
            agentConfig = get_config()

        # Load the checks.d checks
        checksd = load_check_directory(agentConfig)

        # Try to fetch instance Id from EC2 if not hostname has been set
        # in the config file.
        # DEPRECATED
        if agentConfig.get('hostname') is None and agentConfig.get(
                'use_ec2_instance_id'):
            instanceId = EC2.get_instance_id()
            if instanceId is not None:
                agentLogger.info("Running on EC2, instanceId: %s" % instanceId)
                agentConfig['hostname'] = instanceId
            else:
                agentLogger.info(
                    'Not running on EC2, using hostname to identify this server'
                )

        emitters = [http_emitter]
        for emitter_spec in [
                s.strip()
                for s in agentConfig.get('custom_emitters', '').split(',')
        ]:
            if len(emitter_spec) == 0: continue
            emitters.append(modules.load(emitter_spec, 'emitter'))

        check_freq = int(agentConfig['check_freq'])

        # Checks instance
        collector = Collector(agentConfig, emitters, systemStats)

        # Watchdog
        watchdog = None
        if agentConfig.get("watchdog", True):
            watchdog = Watchdog(check_freq * WATCHDOG_MULTIPLIER)
            watchdog.reset()

        # Main loop
        while run_forever:
            collector.run(checksd=checksd)
            if watchdog is not None:
                watchdog.reset()
            time.sleep(check_freq)
Esempio n. 10
0
    def run(self):
        from config import initialize_logging
        initialize_logging('windows_collector')
        log.debug("Windows Service - Starting collector")
        set_win32_requests_ca_bundle_path()
        emitters = self.get_emitters()
        systemStats = get_system_stats()
        self.collector = Collector(self.config, emitters, systemStats,
                                   self.hostname)

        in_developer_mode = self.config.get('developer_mode')

        # In developer mode, the number of runs to be included in a single collector profile
        collector_profile_interval = self.config.get(
            'collector_profile_interval', DEFAULT_COLLECTOR_PROFILE_INTERVAL)
        profiled = False
        collector_profiled_runs = 0

        # Load the checks.d checks
        checksd = load_check_directory(self.config, self.hostname)

        # Main agent loop will run until interrupted
        while self.running:
            if self._heartbeat:
                self._heartbeat.send(0)

            if in_developer_mode and not profiled:
                try:
                    profiler = AgentProfiler()
                    profiler.enable_profiling()
                    profiled = True
                except Exception as e:
                    log.warn("Cannot enable profiler: %s" % str(e))

            self.collector.run(checksd=checksd)

            if profiled:
                if collector_profiled_runs >= collector_profile_interval:
                    try:
                        profiler.disable_profiling()
                        profiled = False
                        collector_profiled_runs = 0
                    except Exception as e:
                        log.warn("Cannot disable profiler: %s" % str(e))
                else:
                    collector_profiled_runs += 1

            time.sleep(self.config['check_freq'])
Esempio n. 11
0
    def run(self):
        from config import initialize_logging; initialize_logging('windows_collector')
        log.debug("Windows Service - Starting collector")
        emitters = self.get_emitters()
        systemStats = get_system_stats()
        self.collector = Collector(self.config, emitters, systemStats, self.hostname)

        # Load the checks.d checks
        checksd = load_check_directory(self.config, self.hostname)

        # Main agent loop will run until interrupted
        while self.running:
            if self._heartbeat:
                self._heartbeat.send(0)
            self.collector.run(checksd=checksd)
            time.sleep(self.config['check_freq'])
Esempio n. 12
0
    def run(self):
        """Main loop of the collector"""

        # Gracefully exit on sigterm.
        signal.signal(signal.SIGTERM, self._handle_sigterm)

        # Save the agent start-up stats.
        CollectorStatus().persist()

        # Intialize the collector.
        agentConfig = self._set_agent_config_hostname(get_config())
        systemStats = get_system_stats()
        emitters = self._get_emitters(agentConfig)
        self.collector = Collector(agentConfig, emitters, systemStats)

        # Load the checks.d checks
        checksd = load_check_directory(agentConfig)

        # Configure the watchdog.
        check_frequency = int(agentConfig['check_freq'])
        watchdog = self._get_watchdog(check_frequency, agentConfig)

        # Run the main loop.
        while self.run_forever:
            # Do the work.
            self.collector.run(checksd=checksd)

            # Only plan for the next loop if we will continue,
            # otherwise just exit quickly.
            if self.run_forever:
                if watchdog:
                    watchdog.reset()
                time.sleep(check_frequency)

        # Now clean-up.
        try:
            CollectorStatus.remove_latest_status()
        except:
            pass

        # Explicitly kill the process, because it might be running
        # as a daemon.
        agent_logger.info("Exiting. Bye bye.")
        sys.exit(0)
Esempio n. 13
0
    def run(self, config=None):
        """Main loop of the collector"""

        # Gracefully exit on sigterm.
        signal.signal(signal.SIGTERM, self._handle_sigterm)

        # A SIGUSR1 signals an exit with an autorestart
        signal.signal(signal.SIGUSR1, self._handle_sigusr1)

        # Handle Keyboard Interrupt
        signal.signal(signal.SIGINT, self._handle_sigterm)

        # A SIGHUP signals a configuration reload
        signal.signal(signal.SIGHUP, self._handle_sighup)

        # Save the agent start-up stats.
        CollectorStatus().persist()

        # Intialize the collector.
        if not config:
            config = get_config(parse_args=True)

        self._agentConfig = self._set_agent_config_hostname(config)
        hostname = get_hostname(self._agentConfig)
        systemStats = get_system_stats(proc_path=self._agentConfig.get(
            'procfs_path', '/proc').rstrip('/'))
        emitters = self._get_emitters()

        # Initialize service discovery
        if self._agentConfig.get('service_discovery'):
            self.sd_backend = get_sd_backend(self._agentConfig)

        # Load the checks.d checks
        self._checksd = load_check_directory(self._agentConfig, hostname)

        # Initialize the Collector
        self.collector = Collector(self._agentConfig, emitters, systemStats,
                                   hostname)

        # In developer mode, the number of runs to be included in a single collector profile
        self.collector_profile_interval = self._agentConfig.get(
            'collector_profile_interval', DEFAULT_COLLECTOR_PROFILE_INTERVAL)

        # Configure the watchdog.
        self.check_frequency = int(self._agentConfig['check_freq'])
        watchdog = self._get_watchdog(self.check_frequency)

        # Initialize the auto-restarter
        self.restart_interval = int(
            self._agentConfig.get('restart_interval', RESTART_INTERVAL))
        self.agent_start = time.time()

        profiled = False
        collector_profiled_runs = 0

        # Run the main loop.
        while self.run_forever:
            log.debug("Found {num_checks} checks".format(
                num_checks=len(self._checksd['initialized_checks'])))

            # Setup profiling if necessary
            if self.in_developer_mode and not profiled:
                try:
                    profiler = AgentProfiler()
                    profiler.enable_profiling()
                    profiled = True
                except Exception as e:
                    log.warn("Cannot enable profiler: %s" % str(e))

            # Do the work.
            self.collector.run(checksd=self._checksd,
                               start_event=self.start_event,
                               configs_reloaded=self.configs_reloaded)

            # This flag is used to know if the check configs have been reloaded at the current
            # run of the agent yet or not. It's used by the collector to know if it needs to
            # look for the AgentMetrics check and pop it out.
            # See: https://github.com/DataDog/dd-agent/blob/5.6.x/checks/collector.py#L265-L272
            self.configs_reloaded = False

            # Look for change in the config template store.
            # The self.sd_backend.reload_check_configs flag is set
            # to True if a config reload is needed.
            if self._agentConfig.get('service_discovery') and self.sd_backend and \
               not self.sd_backend.reload_check_configs:
                try:
                    self.sd_backend.reload_check_configs = get_config_store(
                        self._agentConfig).crawl_config_template()
                except Exception as e:
                    log.warn(
                        'Something went wrong while looking for config template changes: %s'
                        % str(e))

            # Check if we should run service discovery
            # The `reload_check_configs` flag can be set through the docker_daemon check or
            # using ConfigStore.crawl_config_template
            if self._agentConfig.get('service_discovery') and self.sd_backend and \
               self.sd_backend.reload_check_configs:
                self.reload_configs()
                self.configs_reloaded = True
                self.sd_backend.reload_check_configs = False

            if profiled:
                if collector_profiled_runs >= self.collector_profile_interval:
                    try:
                        profiler.disable_profiling()
                        profiled = False
                        collector_profiled_runs = 0
                    except Exception as e:
                        log.warn("Cannot disable profiler: %s" % str(e))

            # Check if we should restart.
            if self.autorestart and self._should_restart():
                self._do_restart()

            # Only plan for next loop if we will continue, otherwise exit quickly.
            if self.run_forever:
                if watchdog:
                    watchdog.reset()
                if profiled:
                    collector_profiled_runs += 1
                log.debug("Sleeping for {0} seconds".format(
                    self.check_frequency))
                time.sleep(self.check_frequency)

        # Now clean-up.
        try:
            CollectorStatus.remove_latest_status()
        except Exception:
            pass

        # Explicitly kill the process, because it might be running as a daemon.
        log.info("Exiting. Bye bye.")
        sys.exit(0)
Esempio n. 14
0
    def run(self, config=None):
        """Main loop of the collector"""

        # Gracefully exit on sigterm.
        signal.signal(signal.SIGTERM, self._handle_sigterm)

        # A SIGUSR1 signals an exit with an autorestart
        signal.signal(signal.SIGUSR1, self._handle_sigusr1)

        # Handle Keyboard Interrupt
        signal.signal(signal.SIGINT, self._handle_sigterm)

        # Save the agent start-up stats.
        CollectorStatus().persist()

        # Intialize the collector.
        if not config:
            config = get_config(parse_args=True)

        agentConfig = self._set_agent_config_hostname(config)
        systemStats = get_system_stats()
        emitters = self._get_emitters(agentConfig)
        # Load the checks.d checks
        checksd = load_check_directory(agentConfig)

        self.collector = Collector(agentConfig, emitters, systemStats)

        # Configure the watchdog.
        check_frequency = int(agentConfig['check_freq'])
        watchdog = self._get_watchdog(check_frequency, agentConfig)

        # Initialize the auto-restarter
        self.restart_interval = int(
            agentConfig.get('restart_interval', RESTART_INTERVAL))
        self.agent_start = time.time()

        # Run the main loop.
        while self.run_forever:

            # enable profiler if needed
            profiled = False
            if agentConfig.get(
                    'profile',
                    False) and agentConfig.get('profile').lower() == 'yes':
                try:
                    import cProfile
                    profiler = cProfile.Profile()
                    profiled = True
                    profiler.enable()
                    log.debug("Agent profiling is enabled")
                except Exception:
                    log.warn("Cannot enable profiler")

            # Do the work.
            self.collector.run(checksd=checksd, start_event=self.start_event)

            # disable profiler and printout stats to stdout
            if agentConfig.get('profile', False) and agentConfig.get(
                    'profile').lower() == 'yes' and profiled:
                try:
                    profiler.disable()
                    import pstats
                    from cStringIO import StringIO
                    s = StringIO()
                    ps = pstats.Stats(profiler,
                                      stream=s).sort_stats("cumulative")
                    ps.print_stats()
                    log.debug(s.getvalue())
                except Exception:
                    log.warn("Cannot disable profiler")

            # Check if we should restart.
            if self.autorestart and self._should_restart():
                self._do_restart()

            # Only plan for the next loop if we will continue,
            # otherwise just exit quickly.
            if self.run_forever:
                if watchdog:
                    watchdog.reset()
                time.sleep(check_frequency)

        # Now clean-up.
        try:
            CollectorStatus.remove_latest_status()
        except Exception:
            pass

        # Explicitly kill the process, because it might be running
        # as a daemon.
        log.info("Exiting. Bye bye.")
        sys.exit(0)
Esempio n. 15
0
 def test_metadata(self):
     c = Collector({}, None, {})
     assert "hostname" in c._get_metadata()
     assert "socket-fqdn" in c._get_metadata()
     assert "socket-hostname" in c._get_metadata()
Esempio n. 16
0
    def run(self, config=None):
        """Main loop of the collector"""

        # Gracefully exit on sigterm.
        signal.signal(signal.SIGTERM, self._handle_sigterm)

        # A SIGUSR1 signals an exit with an autorestart
        signal.signal(signal.SIGUSR1, self._handle_sigusr1)

        # Handle Keyboard Interrupt
        signal.signal(signal.SIGINT, self._handle_sigterm)

        # Save the agent start-up stats.
        CollectorStatus().persist()

        # Intialize the collector.
        if not config:
            config = get_config(parse_args=True)

        agentConfig = self._set_agent_config_hostname(config)
        hostname = get_hostname(agentConfig)
        systemStats = get_system_stats()
        emitters = self._get_emitters(agentConfig)
        # Load the checks.d checks
        checksd = load_check_directory(agentConfig, hostname)

        self.collector = Collector(agentConfig, emitters, systemStats,
                                   hostname)

        # In developer mode, the number of runs to be included in a single collector profile
        collector_profile_interval = agentConfig.get(
            'collector_profile_interval', DEFAULT_COLLECTOR_PROFILE_INTERVAL)

        # Configure the watchdog.
        check_frequency = int(agentConfig['check_freq'])
        watchdog = self._get_watchdog(check_frequency, agentConfig)

        # Initialize the auto-restarter
        self.restart_interval = int(
            agentConfig.get('restart_interval', RESTART_INTERVAL))
        self.agent_start = time.time()

        profiled = False
        collector_profiled_runs = 0

        # Run the main loop.
        while self.run_forever:
            # Setup profiling if necessary
            if self.in_developer_mode and not profiled:
                try:
                    profiler = AgentProfiler()
                    profiler.enable_profiling()
                    profiled = True
                except Exception as e:
                    log.warn("Cannot enable profiler: %s" % str(e))

            # Do the work.
            self.collector.run(checksd=checksd, start_event=self.start_event)
            if profiled:
                if collector_profiled_runs >= collector_profile_interval:
                    try:
                        profiler.disable_profiling()
                        profiled = False
                        collector_profiled_runs = 0
                    except Exception as e:
                        log.warn("Cannot disable profiler: %s" % str(e))

            # Check if we should restart.
            if self.autorestart and self._should_restart():
                self._do_restart()

            # Only plan for the next loop if we will continue,
            # otherwise just exit quickly.
            if self.run_forever:
                if watchdog:
                    watchdog.reset()
                if profiled:
                    collector_profiled_runs += 1
                time.sleep(check_frequency)
        # Now clean-up.
        try:
            CollectorStatus.remove_latest_status()
        except Exception:
            pass

        # Explicitly kill the process, because it might be running
        # as a daemon.
        log.info("Exiting. Bye bye.")
        sys.exit(0)
Esempio n. 17
0
 def test_metadata(self):
     c = Collector({"collect_instance_metadata": True}, None, {}, "foo")
     assert "hostname" in c._get_metadata()
     assert "socket-fqdn" in c._get_metadata()
     assert "socket-hostname" in c._get_metadata()
Esempio n. 18
0
    def run(self, config=None):
        """Main loop of the collector"""

        # Gracefully exit on sigterm.
        signal.signal(signal.SIGTERM, self._handle_sigterm)

        # A SIGUSR1 signals an exit with an autorestart
        signal.signal(signal.SIGUSR1, self._handle_sigusr1)

        # Handle Keyboard Interrupt
        signal.signal(signal.SIGINT, self._handle_sigterm)

        # Save the agent start-up stats.
        CollectorStatus().persist()

        # Intialize the collector.
        if not config:
            config = get_config(parse_args=True)

        agentConfig = self._set_agent_config_hostname(config)
        systemStats = get_system_stats()
        emitters = self._get_emitters(agentConfig)
        # Load the checks.d checks
        checksd = load_check_directory(agentConfig)

        self.collector = Collector(agentConfig, emitters, systemStats)

        # Configure the watchdog.
        check_frequency = int(agentConfig['check_freq'])
        watchdog = self._get_watchdog(check_frequency, agentConfig)

        # Initialize the auto-restarter
        self.restart_interval = int(
            agentConfig.get('restart_interval', RESTART_INTERVAL))
        self.agent_start = time.time()

        # Run the main loop.
        while self.run_forever:
            # Do the work.
            self.collector.run(checksd=checksd, start_event=self.start_event)

            # Check if we should restart.
            if self.autorestart and self._should_restart():
                self._do_restart()

            # Only plan for the next loop if we will continue,
            # otherwise just exit quickly.
            if self.run_forever:
                if watchdog:
                    watchdog.reset()
                time.sleep(check_frequency)

        # Now clean-up.
        try:
            CollectorStatus.remove_latest_status()
        except:
            pass

        # Explicitly kill the process, because it might be running
        # as a daemon.
        log.info("Exiting. Bye bye.")
        sys.exit(0)
Esempio n. 19
0
    def run(self, config=None):
        """Main loop of the collector"""

        # Gracefully exit on sigterm.
        signal.signal(signal.SIGTERM, self._handle_sigterm)

        if not Platform.is_windows():
            # A SIGUSR1 signals an exit with an autorestart
            signal.signal(signal.SIGUSR1, self._handle_sigusr1)

            # Handle Keyboard Interrupt
            signal.signal(signal.SIGINT, self._handle_sigterm)

            # A SIGHUP signals a configuration reload
            signal.signal(signal.SIGHUP, self._handle_sighup)

        # Save the agent start-up stats.
        CollectorStatus().persist()

        # Intialize the collector.
        if not config:
            config = get_config(parse_args=True)

        self._agentConfig = self._set_agent_config_hostname(config)
        hostname = get_hostname(self._agentConfig)
        systemStats = get_system_stats(proc_path=self._agentConfig.get(
            'procfs_path', '/proc').rstrip('/'))
        emitters = self._get_emitters()

        # Initialize service discovery
        if self._agentConfig.get('service_discovery'):
            self.sd_backend = get_sd_backend(self._agentConfig)

        if _is_affirmative(self._agentConfig.get('sd_jmx_enable', False)):
            pipe_path = get_jmx_pipe_path()
            if Platform.is_windows():
                pipe_name = pipe_path.format(pipename=SD_PIPE_NAME)
            else:
                pipe_name = os.path.join(pipe_path, SD_PIPE_NAME)

            if os.access(pipe_path, os.W_OK):
                if not os.path.exists(pipe_name):
                    os.mkfifo(pipe_name)
                self.sd_pipe = os.open(
                    pipe_name, os.O_RDWR)  # RW to avoid blocking (will only W)

                # Initialize Supervisor proxy
                self.supervisor_proxy = self._get_supervisor_socket(
                    self._agentConfig)
            else:
                log.debug(
                    'Unable to create pipe in temporary directory. JMX service discovery disabled.'
                )

        # Load the checks.d checks
        self._checksd = load_check_directory(self._agentConfig, hostname)

        # Load JMX configs if available
        if self._jmx_service_discovery_enabled:
            jmx_sd_configs = generate_jmx_configs(self._agentConfig, hostname)
            if jmx_sd_configs:
                self._submit_jmx_service_discovery(jmx_sd_configs)

        # Initialize the Collector
        self.collector = Collector(self._agentConfig, emitters, systemStats,
                                   hostname)

        # In developer mode, the number of runs to be included in a single collector profile
        try:
            self.collector_profile_interval = int(
                self._agentConfig.get('collector_profile_interval',
                                      DEFAULT_COLLECTOR_PROFILE_INTERVAL))
        except ValueError:
            log.warn('collector_profile_interval is invalid. '
                     'Using default value instead (%s).' %
                     DEFAULT_COLLECTOR_PROFILE_INTERVAL)
            self.collector_profile_interval = DEFAULT_COLLECTOR_PROFILE_INTERVAL

        # Configure the watchdog.
        self.check_frequency = int(self._agentConfig['check_freq'])
        watchdog = self._get_watchdog(self.check_frequency)

        # Initialize the auto-restarter
        self.restart_interval = int(
            self._agentConfig.get('restart_interval', RESTART_INTERVAL))
        self.agent_start = time.time()

        self.allow_profiling = self._agentConfig.get('allow_profiling', True)

        profiled = False
        collector_profiled_runs = 0

        # Run the main loop.
        while self.run_forever:
            # Setup profiling if necessary
            if self.allow_profiling and self.in_developer_mode and not profiled:
                try:
                    profiler = AgentProfiler()
                    profiler.enable_profiling()
                    profiled = True
                except Exception as e:
                    log.warn("Cannot enable profiler: %s" % str(e))

            if self.reload_configs_flag:
                if isinstance(self.reload_configs_flag, set):
                    self.reload_configs(
                        checks_to_reload=self.reload_configs_flag)
                else:
                    self.reload_configs()

            # Do the work. Pass `configs_reloaded` to let the collector know if it needs to
            # look for the AgentMetrics check and pop it out.
            self.collector.run(
                checksd=self._checksd,
                start_event=self.start_event,
                configs_reloaded=True if self.reload_configs_flag else False)

            self.reload_configs_flag = False

            # Look for change in the config template store.
            # The self.sd_backend.reload_check_configs flag is set
            # to True if a config reload is needed.
            if self._agentConfig.get('service_discovery') and self.sd_backend and \
               not self.sd_backend.reload_check_configs:
                try:
                    self.sd_backend.reload_check_configs = get_config_store(
                        self._agentConfig).crawl_config_template()
                except Exception as e:
                    log.warn(
                        'Something went wrong while looking for config template changes: %s'
                        % str(e))

            # Check if we should run service discovery
            # The `reload_check_configs` flag can be set through the docker_daemon check or
            # using ConfigStore.crawl_config_template
            if self._agentConfig.get('service_discovery') and self.sd_backend and \
               self.sd_backend.reload_check_configs:
                self.reload_configs_flag = self.sd_backend.reload_check_configs
                self.sd_backend.reload_check_configs = False

            if profiled:
                if collector_profiled_runs >= self.collector_profile_interval:
                    try:
                        profiler.disable_profiling()
                        profiled = False
                        collector_profiled_runs = 0
                    except Exception as e:
                        log.warn("Cannot disable profiler: %s" % str(e))

            # Check if we should restart.
            if self.autorestart and self._should_restart():
                self._do_restart()

            # Only plan for next loop if we will continue, otherwise exit quickly.
            if self.run_forever:
                if watchdog:
                    watchdog.reset()
                if profiled:
                    collector_profiled_runs += 1
                log.debug("Sleeping for {0} seconds".format(
                    self.check_frequency))
                time.sleep(self.check_frequency)

        # Now clean-up.
        try:
            CollectorStatus.remove_latest_status()
        except Exception:
            pass

        # Explicitly kill the process, because it might be running as a daemon.
        log.info("Exiting. Bye bye.")
        sys.exit(0)
Esempio n. 20
0
    def run(self, config=None):

        signal.signal(signal.SIGTERM, self._handle_sigterm)

        signal.signal(signal.SIGUSR1, self._handle_sigusr1)

        signal.signal(signal.SIGINT, self._handle_sigterm)

        signal.signal(signal.SIGHUP, self._handle_sighup)

        CollectorStatus().persist()

        if not config:
            config = get_config(parse_args=True)

        self._agentConfig = self._set_agent_config_hostname(config)
        hostname = get_hostname(self._agentConfig)
        systemStats = get_system_stats()
        emitters = self._get_emitters()

        self._checksd = load_check_directory(self._agentConfig, hostname)

        self.collector = Collector(self._agentConfig, emitters, systemStats, hostname)

        self.collector_profile_interval = self._agentConfig.get('collector_profile_interval',
                                                                DEFAULT_COLLECTOR_PROFILE_INTERVAL)

        self.check_frequency = int(self._agentConfig['check_freq'])
        watchmonitor = self._get_watchmonitor(self.check_frequency)

        self.restart_interval = int(self._agentConfig.get('restart_interval', RESTART_INTERVAL))
        self.agent_start = time.time()

        profiled = False
        collector_profiled_runs = 0

        while self.run_forever:
            log.debug("Found {num_checks} checks".format(num_checks=len(self._checksd['initialized_checks'])))

            if self.in_developer_mode and not profiled:
                try:
                    profiler = AgentProfiler()
                    profiler.enable_profiling()
                    profiled = True
                except Exception as e:
                    log.warn("Cannot enable profiler: %s" % str(e))

            self.collector.run(checksd=self._checksd,
                               start_event=self.start_event,
                               configs_reloaded=self.configs_reloaded)
            if self.configs_reloaded:
                self.configs_reloaded = False
            if profiled:
                if collector_profiled_runs >= self.collector_profile_interval:
                    try:
                        profiler.disable_profiling()
                        profiled = False
                        collector_profiled_runs = 0
                    except Exception as e:
                        log.warn("Cannot disable profiler: %s" % str(e))

            if self.autorestart and self._should_restart():
                self._do_restart()

            if self.run_forever:
                if watchmonitor:
                    watchmonitor.reset()
                if profiled:
                    collector_profiled_runs += 1
                log.debug("Sleeping for {0} seconds".format(self.check_frequency))
                time.sleep(self.check_frequency)

        try:
            CollectorStatus.remove_latest_status()
        except Exception:
            pass

        log.info("Exiting. Bye bye.")
        sys.exit(0)