def _get_watchdog(self, check_freq, agentConfig): watchdog = None if agentConfig.get("watchdog", True): watchdog = Watchdog(check_freq * WATCHDOG_MULTIPLIER, max_mem_mb=agentConfig.get('limit_memory_consumption', None)) watchdog.reset() return watchdog
def __init__(self, interval, metrics_aggregator, api_host, api_key=None, use_watchdog=False): threading.Thread.__init__(self) self.daemon = True self.interval = int(interval) self.finished = threading.Event() self.metrics_aggregator = metrics_aggregator self.flush_count = 0 self.watchdog = None if use_watchdog: from util import Watchdog self.watchdog = Watchdog(WATCHDOG_TIMEOUT) self.api_key = api_key self.api_host = api_host self.http_conn_cls = http_client.HTTPSConnection match = re.match('^(https?)://(.*)', api_host) if match: self.api_host = match.group(2) if match.group(1) == 'http': self.http_conn_cls = http_client.HTTPConnection
def __init__(self, port, agentConfig, watchdog=True, skip_ssl_validation=False, use_simple_http_client=False): self._port = int(port) self._agentConfig = agentConfig self._metrics = {} MetricTransaction.set_application(self) MetricTransaction.set_endpoints() self._tr_manager = TransactionManager(MAX_WAIT_FOR_REPLAY, MAX_QUEUE_SIZE, THROTTLING_DELAY) MetricTransaction.set_tr_manager(self._tr_manager) self._watchdog = None self.skip_ssl_validation = skip_ssl_validation or agentConfig.get( 'skip_ssl_validation', False) self.use_simple_http_client = use_simple_http_client if self.skip_ssl_validation: log.info( "Skipping SSL hostname validation, useful when using a transparent proxy" ) if watchdog: watchdog_timeout = TRANSACTION_FLUSH_INTERVAL * WATCHDOG_INTERVAL_MULTIPLIER self._watchdog = Watchdog(watchdog_timeout, max_mem_mb=agentConfig.get( 'limit_memory_consumption', None))
def use_lots_of_memory(self): # Skip this step on travis if os.environ.get('TRAVIS', False): return a = Application(12345, {}) a._watchdog = Watchdog(30, 50) a._tr_manager = MemoryHogTxManager() a.run()
def run(self, agentConfig=None, run_forever=True): """Main loop of the collector""" agentLogger = logging.getLogger('agent') systemStats = get_system_stats() if agentConfig is None: agentConfig = get_config() # Load the checks.d checks checksd = load_check_directory(agentConfig) # Try to fetch instance Id from EC2 if not hostname has been set # in the config file. # DEPRECATED if agentConfig.get('hostname') is None and agentConfig.get( 'use_ec2_instance_id'): instanceId = EC2.get_instance_id() if instanceId is not None: agentLogger.info("Running on EC2, instanceId: %s" % instanceId) agentConfig['hostname'] = instanceId else: agentLogger.info( 'Not running on EC2, using hostname to identify this server' ) emitters = [http_emitter] for emitter_spec in [ s.strip() for s in agentConfig.get('custom_emitters', '').split(',') ]: if len(emitter_spec) == 0: continue emitters.append(modules.load(emitter_spec, 'emitter')) check_freq = int(agentConfig['check_freq']) # Checks instance collector = Collector(agentConfig, emitters, systemStats) # Watchdog watchdog = None if agentConfig.get("watchdog", True): watchdog = Watchdog(check_freq * WATCHDOG_MULTIPLIER) watchdog.reset() # Main loop while run_forever: collector.run(checksd=checksd) if watchdog is not None: watchdog.reset() time.sleep(check_freq)
def __init__(self, port, agentConfig, watchdog=True): self._port = int(port) self._agentConfig = agentConfig self._metrics = {} MetricTransaction.set_application(self) MetricTransaction.set_endpoints() self._tr_manager = TransactionManager(MAX_WAIT_FOR_REPLAY, MAX_QUEUE_SIZE, THROTTLING_DELAY) MetricTransaction.set_tr_manager(self._tr_manager) self._watchdog = None if watchdog: watchdog_timeout = TRANSACTION_FLUSH_INTERVAL * WATCHDOG_INTERVAL_MULTIPLIER self._watchdog = Watchdog(watchdog_timeout)
def __init__(self, interval, metrics_aggregator, api_host, api_key=None, use_watchdog=False, event_chunk_size=None): threading.Thread.__init__(self) self.interval = int(interval) self.finished = threading.Event() self.metrics_aggregator = metrics_aggregator self.flush_count = 0 self.log_count = 0 self.watchdog = None if use_watchdog: from util import Watchdog self.watchdog = Watchdog(WATCHDOG_TIMEOUT) self.api_key = api_key self.api_host = api_host self.event_chunk_size = event_chunk_size or EVENT_CHUNK_SIZE
def __init__(self, port, agentConfig, watchdog=True, skip_ssl_validation=False, use_simple_http_client=False): self._port = int(port) self._agentConfig = agentConfig self._metrics = {} AgentTransaction.set_application(self) AgentTransaction.set_endpoints(agentConfig['endpoints']) if agentConfig['endpoints'] == {}: log.warning( u"No valid endpoint found. Forwarder will drop all incoming payloads." ) AgentTransaction.set_request_timeout(agentConfig['forwarder_timeout']) max_parallelism = self.NO_PARALLELISM # Multiple endpoints => enable parallelism if len(agentConfig['endpoints']) > 1: max_parallelism = self.DEFAULT_PARALLELISM self._tr_manager = TransactionManager(MAX_WAIT_FOR_REPLAY, MAX_QUEUE_SIZE, THROTTLING_DELAY, max_parallelism=max_parallelism) AgentTransaction.set_tr_manager(self._tr_manager) self._watchdog = None self.skip_ssl_validation = skip_ssl_validation or agentConfig.get( 'skip_ssl_validation', False) self.use_simple_http_client = use_simple_http_client if self.skip_ssl_validation: log.info( "Skipping SSL hostname validation, useful when using a transparent proxy" ) # Monitor activity if watchdog: watchdog_timeout = TRANSACTION_FLUSH_INTERVAL * WATCHDOG_INTERVAL_MULTIPLIER / 1000 self._watchdog = Watchdog( watchdog_timeout, max_mem_mb=agentConfig.get('limit_memory_consumption', None), max_resets=WATCHDOG_HIGH_ACTIVITY_THRESHOLD)
def test_watchdog_frenesy_detection(self, mock_restarted): """ Watchdog restarts the process on suspicious high activity. """ # Limit the restart timeframe for test purpose Watchdog._RESTART_TIMEFRAME = 1 # Create a watchdog with a low activity tolerancy process_watchdog = Watchdog(10, max_resets=3) ping_watchdog = process_watchdog.reset with self.set_time(1): # Can be reset 3 times within the watchdog timeframe for x in xrange(0, 3): ping_watchdog() # On the 4th attempt, the watchdog detects a suspicously high activity self.assertRaises(WatchdogKill, ping_watchdog) with self.set_time(3): # Gets back to normal when the activity timeframe expires. ping_watchdog()
def busy_run(self): w = Watchdog(5) w.reset() x = 0 while True: x = random()
def fast_tornado(self): a = Application(12345, {}) a._watchdog = Watchdog(6) a._tr_manager = MockTxManager() a.run()
def normal_run(self): w = Watchdog(2) w.reset() for i in range(5): time.sleep(1) w.reset()
def hanging_net(self): w = Watchdog(5) w.reset() x = url.urlopen("http://localhost:31834") print "ERROR Net call returned", x return True
def slow_tornado(self): a = Application(12345, self.AGENT_CONFIG) a._watchdog = Watchdog(4) a._tr_manager = MockTxManager() a.run()
def fast_tornado(self): a = Application(12345, {"bind_host": "localhost"}) a._watchdog = Watchdog(6) a._tr_manager = MockTxManager() a.run()
def _get_watchdog(self, check_freq, agentConfig): watchdog = None if agentConfig.get("watchdog", True): watchdog = Watchdog(check_freq * WATCHDOG_MULTIPLIER) watchdog.reset() return watchdog