def __init__(self, config_path): config = cfg.Config() statsd_config = config.get_config(['Main', 'Statsd']) # Create the aggregator (which is the point of communication between the server and reporting threads. aggregator = agg.MetricsAggregator(util.get_hostname(), recent_point_threshold=statsd_config['recent_point_threshold']) # Start the reporting thread. interval = int(statsd_config['monasca_statsd_interval']) assert 0 < interval self.reporter = reporter.Reporter(interval, aggregator, statsd_config['forwarder_url'], statsd_config.get('event_chunk_size')) # Start the server on an IPv4 stack if statsd_config['non_local_traffic']: server_host = '' else: server_host = 'localhost' self.server = udp.Server(aggregator, server_host, statsd_config['monasca_statsd_port'], forward_to_host=statsd_config.get('monasca_statsd_forward_host'), forward_to_port=int(statsd_config.get('monasca_statsd_forward_port')))
def check(self, instance): if self.high_watermarks.get(instance.get('name'), None) is None: # On the first run of check(), prime the high_watermarks dict # (Setting high_watermarks in the next statement prevents # any kind of infinite loop (assuming nothing ever sets # high_watermarks to None again!)) self.high_watermarks[instance.get('name')] = defaultdict(lambda: 0) self.check(instance) jenkins_home = instance.get('jenkins_home', None) if not jenkins_home: raise Exception("No jenkins_home directory set in the config file") jenkins_jobs_dir = os.path.join(jenkins_home, 'jobs', '*') job_dirs = glob(jenkins_jobs_dir) if not job_dirs: raise Exception('No jobs found in `%s`! ' 'Check `jenkins_home` in your config' % (jenkins_jobs_dir)) for job_dir in job_dirs: for output in self._get_build_results(instance.get('name'), job_dir): output['host'] = get_hostname()
def __init__(self, config_path): config = cfg.Config() statsd_config = config.get_config(['Main', 'Statsd']) # Create the aggregator (which is the point of communication between the server and reporting threads. aggregator = agg.MetricsAggregator( util.get_hostname(), recent_point_threshold=statsd_config['recent_point_threshold']) # Start the reporting thread. interval = int(statsd_config['monasca_statsd_interval']) assert 0 < interval self.reporter = reporter.Reporter( interval, aggregator, statsd_config['forwarder_url'], statsd_config.get('event_chunk_size')) # Start the server on an IPv4 stack if statsd_config['non_local_traffic']: server_host = '' else: server_host = 'localhost' self.server = udp.Server( aggregator, server_host, statsd_config['monasca_statsd_port'], forward_to_host=statsd_config.get('monasca_statsd_forward_host'), forward_to_port=int( statsd_config.get('monasca_statsd_forward_port')))
def _check(self, instance): """Run the desired host-alive check against this host. """ host_name = instance.get('host_name', None) if not host_name: raise ValueError('host_name not specified!') # Allow a different network name to be used for the check # to handle multi-homed systems if instance.get('target_hostname', None): target_hostname = instance.get('target_hostname') else: target_hostname = host_name host_dimensions = { 'hostname': host_name, 'observer_host': util.get_hostname() } # If the check is against a different network name than host_name, add it to # the dimensions if target_hostname != host_name: host_dimensions['target_hostname'] = target_hostname dimensions = self._set_dimensions(host_dimensions, instance) success = False test_type = instance['alive_test'] if test_type == 'ssh': success, error_message = self._test_ssh( target_hostname, self.init_config.get('ssh_port'), self.init_config.get('ssh_timeout')) elif test_type == 'ping': success, error_message = self._test_ping( target_hostname, self.init_config.get('ping_timeout')) else: error_message = 'Unrecognized alive_test: {0}'.format(test_type) dimensions.update({'test_type': test_type}) if success is True: self.gauge('host_alive_status', 0, dimensions=dimensions) return services_checks.Status.UP, "UP" else: self.gauge('host_alive_status', 1, dimensions=dimensions, value_meta={'error': error_message}) return services_checks.Status.DOWN, "DOWN"
def _check(self, instance): """Run the desired host-alive check against this host. """ host_name = instance.get('host_name', None) if not host_name: raise ValueError('host_name not specified!') # Allow a different network name to be used for the check # to handle multi-homed systems if instance.get('target_hostname', None): target_hostname = instance.get('target_hostname') else: target_hostname = host_name host_dimensions = {'hostname': host_name, 'observer_host': util.get_hostname()} # If the check is against a different network name than host_name, add it to # the dimensions if target_hostname != host_name: host_dimensions['target_hostname'] = target_hostname dimensions = self._set_dimensions(host_dimensions, instance) success = False test_type = instance['alive_test'] if test_type == 'ssh': success, error_message = self._test_ssh(target_hostname, self.init_config.get('ssh_port'), self.init_config.get('ssh_timeout')) elif test_type == 'ping': success, error_message = self._test_ping(target_hostname, self.init_config.get('ping_timeout')) else: error_message = 'Unrecognized alive_test: {0}'.format(test_type) dimensions.update({'test_type': test_type}) if success is True: self.gauge('host_alive_status', 0, dimensions=dimensions) return services_checks.Status.UP, "UP" else: self.gauge('host_alive_status', 1, dimensions=dimensions, value_meta={'error': error_message}) return services_checks.Status.DOWN, "DOWN"
def setUp(self): unittest.TestCase.setUp(self) init_config = {} agent_config = {} self._host_alive = HostAlive('TestHostAlive', init_config, agent_config) self._gauge = mock.Mock() self._host_alive.gauge = self._gauge self._host_name = 'monasca' self._instance = {'host_name': self._host_name, 'alive_test': 'ping'} self._base_dimensions = { 'test_type': 'ping', 'hostname': self._host_name, 'observer_host': util.get_hostname() }
def check(self, instance, create_event=True): dimensions = self._set_dimensions(None, instance) if self.high_watermarks.get(instance.get('name'), None) is None: # On the first run of check(), prime the high_watermarks dict # so that we only send events that occurred after the agent # started. # (Setting high_watermarks in the next statement prevents # any kind of infinite loop (assuming nothing ever sets # high_watermarks to None again!)) self.high_watermarks[instance.get('name')] = defaultdict(lambda: 0) self.check(instance, create_event=False) jenkins_home = instance.get('jenkins_home', None) if not jenkins_home: raise Exception("No jenkins_home directory set in the config file") jenkins_jobs_dir = os.path.join(jenkins_home, 'jobs', '*') job_dirs = glob(jenkins_jobs_dir) if not job_dirs: raise Exception('No jobs found in `%s`! ' 'Check `jenkins_home` in your config' % (jenkins_jobs_dir)) for job_dir in job_dirs: for output in self._get_build_results(instance.get('name'), job_dir): output['host'] = get_hostname(self.agent_config) if create_event: self.log.debug("Creating event for job: %s" % output['job_name']) self.event(output) dimensions.update({'job_name': output['job_name']}) if 'branch' in output: dimensions.update({'branch': output['branch']}) self.gauge("jenkins.job.duration", float(output['duration']) / 1000.0, dimensions=dimensions) if output['result'] == 'SUCCESS': self.increment('jenkins.job.success', dimensions=dimensions) else: self.increment('jenkins.job.failure', dimensions=dimensions)
def __init__(self, name, init_config, agent_config, instances=None): """Initialize a new check. :param name: The name of the check :param init_config: The config for initializing the check :param agent_config: The global configuration for the agent :param instances: A list of configuration objects for each instance. """ super(AgentCheck, self).__init__(agent_config) self.name = name self.init_config = init_config self.hostname = util.get_hostname() self.log = logging.getLogger('%s.%s' % (__name__, name)) threshold = agent_config.get('recent_point_threshold', None) self.aggregator = (aggregator.MetricsAggregator( self.hostname, recent_point_threshold=threshold)) self.instances = instances or [] self.library_versions = None
def create_event(self, state, server, agentConfig): """Create an event with a message describing the replication state of a mongo node """ def get_state_description(state): if state == 0: return 'Starting Up' elif state == 1: return 'Primary' elif state == 2: return 'Secondary' elif state == 3: return 'Recovering' elif state == 4: return 'Fatal' elif state == 5: return 'Starting up (forking threads)' elif state == 6: return 'Unknown' elif state == 7: return 'Arbiter' elif state == 8: return 'Down' elif state == 9: return 'Rollback' status = get_state_description(state) hostname = get_hostname(agentConfig) msg_title = "%s is %s" % (server, status) msg = "MongoDB %s just reported as %s" % (server, status) self.event({ 'timestamp': int(time.time()), 'event_type': 'Mongo', 'api_key': agentConfig['api_key'], 'msg_title': msg_title, 'msg_text': msg, 'host': hostname })
def check(self, instance, create_event=True): dimensions = self._set_dimensions(None, instance) if self.high_watermarks.get(instance.get('name'), None) is None: # On the first run of check(), prime the high_watermarks dict # so that we only send events that occurred after the agent # started. # (Setting high_watermarks in the next statement prevents # any kind of infinite loop (assuming nothing ever sets # high_watermarks to None again!)) self.high_watermarks[instance.get('name')] = defaultdict(lambda: 0) self.check(instance, create_event=False) jenkins_home = instance.get('jenkins_home', None) if not jenkins_home: raise Exception("No jenkins_home directory set in the config file") jenkins_jobs_dir = os.path.join(jenkins_home, 'jobs', '*') job_dirs = glob(jenkins_jobs_dir) if not job_dirs: raise Exception('No jobs found in `%s`! ' 'Check `jenkins_home` in your config' % (jenkins_jobs_dir)) for job_dir in job_dirs: for output in self._get_build_results(instance.get('name'), job_dir): output['host'] = get_hostname(self.agent_config) if create_event: self.log.debug("Creating event for job: %s" % output['job_name']) self.event(output) dimensions.update({'job_name': output['job_name']}) if 'branch' in output: dimensions.update({'branch': output['branch']}) self.gauge("jenkins.job.duration", float( output['duration']) / 1000.0, dimensions=dimensions) if output['result'] == 'SUCCESS': self.increment('jenkins.job.success', dimensions=dimensions) else: self.increment('jenkins.job.failure', dimensions=dimensions)
def __init__(self, name, init_config, agent_config, instances=None): """Initialize a new check. :param name: The name of the check :param init_config: The config for initializing the check :param agent_config: The global configuration for the agent :param instances: A list of configuration objects for each instance. """ super(AgentCheck, self).__init__(agent_config) self.name = name self.init_config = init_config self.hostname = util.get_hostname() self.log = logging.getLogger('%s.%s' % (__name__, name)) threshold = agent_config.get('recent_point_threshold', None) self.aggregator = ( aggregator.MetricsAggregator(self.hostname, recent_point_threshold=threshold)) self.instances = instances or [] self.library_versions = None
def _check(self, instance): """Run the desired host-alive check againt this host. """ if not instance['host_name']: raise ValueError('Target hostname not specified!') dimensions = self._set_dimensions( { 'hostname': instance['host_name'], 'observer_host': util.get_hostname() }, instance) success = False test_type = instance['alive_test'] if test_type == 'ssh': success, error_message = self._test_ssh( instance['host_name'], self.init_config.get('ssh_port'), self.init_config.get('ssh_timeout')) elif test_type == 'ping': success, error_message = self._test_ping( instance['host_name'], self.init_config.get('ping_timeout')) else: error_message = 'Unrecognized alive_test: {0}'.format(test_type) dimensions.update({'test_type': test_type}) if success is True: self.gauge('host_alive_status', 0, dimensions=dimensions) return services_checks.Status.UP, "UP" else: self.gauge('host_alive_status', 1, dimensions=dimensions, value_meta={'error': error_message}) self.log.error( 'Host alive check for {0} failed. Error was {1}'.format( instance['host_name'], error_message)) return services_checks.Status.DOWN, "DOWN"
def _check(self, instance): """Run the desired host-alive check againt this host. """ if not instance['host_name']: raise ValueError('Target hostname not specified!') dimensions = self._set_dimensions({'hostname': instance['host_name'], 'observer_host': util.get_hostname()}, instance) success = False test_type = instance['alive_test'] if test_type == 'ssh': success, error_message = self._test_ssh(instance['host_name'], self.init_config.get('ssh_port'), self.init_config.get('ssh_timeout')) elif test_type == 'ping': success, error_message = self._test_ping(instance['host_name'], self.init_config.get('ping_timeout')) else: error_message = 'Unrecognized alive_test: {0}'.format(test_type) dimensions.update({'test_type': test_type}) if success is True: self.gauge('host_alive_status', 0, dimensions=dimensions) return services_checks.Status.UP, "UP" else: self.gauge('host_alive_status', 1, dimensions=dimensions, value_meta={'error': error_message}) self.log.error('Host alive check for {0} failed. Error was {1}'.format(instance['host_name'], error_message)) return services_checks.Status.DOWN, "DOWN"
def setUp(self): unittest.TestCase.setUp(self) init_config = {} agent_config = {} self._host_alive = HostAlive("TestHostAlive", init_config, agent_config) self._gauge = mock.Mock() self._host_alive.gauge = self._gauge self._host_name = "monasca" self._instance = {"host_name": self._host_name, "alive_test": "ping"} self._base_dimensions = {"test_type": "ping", "hostname": self._host_name, "observer_host": util.get_hostname()}