Exemple #1
0
    def __init__(self, config_path):
        config = cfg.Config()
        statsd_config = config.get_config(['Main', 'Statsd'])

        # Create the aggregator (which is the point of communication between the server and reporting threads.
        aggregator = agg.MetricsAggregator(util.get_hostname(),
                                           recent_point_threshold=statsd_config['recent_point_threshold'])

        # Start the reporting thread.
        interval = int(statsd_config['monasca_statsd_interval'])
        assert 0 < interval

        self.reporter = reporter.Reporter(interval,
                                          aggregator,
                                          statsd_config['forwarder_url'],
                                          statsd_config.get('event_chunk_size'))

        # Start the server on an IPv4 stack
        if statsd_config['non_local_traffic']:
            server_host = ''
        else:
            server_host = 'localhost'

        self.server = udp.Server(aggregator, server_host, statsd_config['monasca_statsd_port'],
                                 forward_to_host=statsd_config.get('monasca_statsd_forward_host'),
                                 forward_to_port=int(statsd_config.get('monasca_statsd_forward_port')))
Exemple #2
0
    def check(self, instance):
        if self.high_watermarks.get(instance.get('name'), None) is None:
            # On the first run of check(), prime the high_watermarks dict
            # (Setting high_watermarks in the next statement prevents
            # any kind of infinite loop (assuming nothing ever sets
            # high_watermarks to None again!))
            self.high_watermarks[instance.get('name')] = defaultdict(lambda: 0)
            self.check(instance)

        jenkins_home = instance.get('jenkins_home', None)

        if not jenkins_home:
            raise Exception("No jenkins_home directory set in the config file")

        jenkins_jobs_dir = os.path.join(jenkins_home, 'jobs', '*')
        job_dirs = glob(jenkins_jobs_dir)

        if not job_dirs:
            raise Exception('No jobs found in `%s`! '
                            'Check `jenkins_home` in your config' %
                            (jenkins_jobs_dir))

        for job_dir in job_dirs:
            for output in self._get_build_results(instance.get('name'),
                                                  job_dir):
                output['host'] = get_hostname()
Exemple #3
0
    def __init__(self, config_path):
        config = cfg.Config()
        statsd_config = config.get_config(['Main', 'Statsd'])

        # Create the aggregator (which is the point of communication between the server and reporting threads.
        aggregator = agg.MetricsAggregator(
            util.get_hostname(),
            recent_point_threshold=statsd_config['recent_point_threshold'])

        # Start the reporting thread.
        interval = int(statsd_config['monasca_statsd_interval'])
        assert 0 < interval

        self.reporter = reporter.Reporter(
            interval, aggregator, statsd_config['forwarder_url'],
            statsd_config.get('event_chunk_size'))

        # Start the server on an IPv4 stack
        if statsd_config['non_local_traffic']:
            server_host = ''
        else:
            server_host = 'localhost'

        self.server = udp.Server(
            aggregator,
            server_host,
            statsd_config['monasca_statsd_port'],
            forward_to_host=statsd_config.get('monasca_statsd_forward_host'),
            forward_to_port=int(
                statsd_config.get('monasca_statsd_forward_port')))
    def _check(self, instance):
        """Run the desired host-alive check against this host.

        """

        host_name = instance.get('host_name', None)
        if not host_name:
            raise ValueError('host_name not specified!')

        # Allow a different network name to be used for the check
        # to handle multi-homed systems
        if instance.get('target_hostname', None):
            target_hostname = instance.get('target_hostname')
        else:
            target_hostname = host_name

        host_dimensions = {
            'hostname': host_name,
            'observer_host': util.get_hostname()
        }
        # If the check is against a different network name than host_name, add it to
        # the dimensions
        if target_hostname != host_name:
            host_dimensions['target_hostname'] = target_hostname

        dimensions = self._set_dimensions(host_dimensions, instance)

        success = False
        test_type = instance['alive_test']
        if test_type == 'ssh':
            success, error_message = self._test_ssh(
                target_hostname, self.init_config.get('ssh_port'),
                self.init_config.get('ssh_timeout'))
        elif test_type == 'ping':
            success, error_message = self._test_ping(
                target_hostname, self.init_config.get('ping_timeout'))
        else:
            error_message = 'Unrecognized alive_test: {0}'.format(test_type)

        dimensions.update({'test_type': test_type})
        if success is True:
            self.gauge('host_alive_status', 0, dimensions=dimensions)
            return services_checks.Status.UP, "UP"
        else:
            self.gauge('host_alive_status',
                       1,
                       dimensions=dimensions,
                       value_meta={'error': error_message})
            return services_checks.Status.DOWN, "DOWN"
Exemple #5
0
    def _check(self, instance):
        """Run the desired host-alive check against this host.

        """

        host_name = instance.get('host_name', None)
        if not host_name:
            raise ValueError('host_name not specified!')

        # Allow a different network name to be used for the check
        # to handle multi-homed systems
        if instance.get('target_hostname', None):
            target_hostname = instance.get('target_hostname')
        else:
            target_hostname = host_name

        host_dimensions = {'hostname': host_name, 'observer_host': util.get_hostname()}
        # If the check is against a different network name than host_name, add it to
        # the dimensions
        if target_hostname != host_name:
            host_dimensions['target_hostname'] = target_hostname

        dimensions = self._set_dimensions(host_dimensions,
                                          instance)

        success = False
        test_type = instance['alive_test']
        if test_type == 'ssh':
            success, error_message = self._test_ssh(target_hostname,
                                                    self.init_config.get('ssh_port'),
                                                    self.init_config.get('ssh_timeout'))
        elif test_type == 'ping':
            success, error_message = self._test_ping(target_hostname,
                                                     self.init_config.get('ping_timeout'))
        else:
            error_message = 'Unrecognized alive_test: {0}'.format(test_type)

        dimensions.update({'test_type': test_type})
        if success is True:
            self.gauge('host_alive_status',
                       0,
                       dimensions=dimensions)
            return services_checks.Status.UP, "UP"
        else:
            self.gauge('host_alive_status',
                       1,
                       dimensions=dimensions,
                       value_meta={'error': error_message})
            return services_checks.Status.DOWN, "DOWN"
 def setUp(self):
     unittest.TestCase.setUp(self)
     init_config = {}
     agent_config = {}
     self._host_alive = HostAlive('TestHostAlive', init_config, agent_config)
     self._gauge = mock.Mock()
     self._host_alive.gauge = self._gauge
     self._host_name = 'monasca'
     self._instance = {'host_name': self._host_name,
                 'alive_test': 'ping'}
     self._base_dimensions = {
         'test_type': 'ping',
         'hostname': self._host_name,
         'observer_host': util.get_hostname()
     }
 def setUp(self):
     unittest.TestCase.setUp(self)
     init_config = {}
     agent_config = {}
     self._host_alive = HostAlive('TestHostAlive', init_config,
                                  agent_config)
     self._gauge = mock.Mock()
     self._host_alive.gauge = self._gauge
     self._host_name = 'monasca'
     self._instance = {'host_name': self._host_name, 'alive_test': 'ping'}
     self._base_dimensions = {
         'test_type': 'ping',
         'hostname': self._host_name,
         'observer_host': util.get_hostname()
     }
Exemple #8
0
    def check(self, instance, create_event=True):
        dimensions = self._set_dimensions(None, instance)
        if self.high_watermarks.get(instance.get('name'), None) is None:
            # On the first run of check(), prime the high_watermarks dict
            # so that we only send events that occurred after the agent
            # started.
            # (Setting high_watermarks in the next statement prevents
            # any kind of infinite loop (assuming nothing ever sets
            # high_watermarks to None again!))
            self.high_watermarks[instance.get('name')] = defaultdict(lambda: 0)
            self.check(instance, create_event=False)

        jenkins_home = instance.get('jenkins_home', None)

        if not jenkins_home:
            raise Exception("No jenkins_home directory set in the config file")

        jenkins_jobs_dir = os.path.join(jenkins_home, 'jobs', '*')
        job_dirs = glob(jenkins_jobs_dir)

        if not job_dirs:
            raise Exception('No jobs found in `%s`! '
                            'Check `jenkins_home` in your config' %
                            (jenkins_jobs_dir))

        for job_dir in job_dirs:
            for output in self._get_build_results(instance.get('name'),
                                                  job_dir):
                output['host'] = get_hostname(self.agent_config)
                if create_event:
                    self.log.debug("Creating event for job: %s" %
                                   output['job_name'])
                    self.event(output)

                    dimensions.update({'job_name': output['job_name']})
                    if 'branch' in output:
                        dimensions.update({'branch': output['branch']})
                    self.gauge("jenkins.job.duration",
                               float(output['duration']) / 1000.0,
                               dimensions=dimensions)

                    if output['result'] == 'SUCCESS':
                        self.increment('jenkins.job.success',
                                       dimensions=dimensions)
                    else:
                        self.increment('jenkins.job.failure',
                                       dimensions=dimensions)
Exemple #9
0
    def __init__(self, name, init_config, agent_config, instances=None):
        """Initialize a new check.

        :param name: The name of the check
        :param init_config: The config for initializing the check
        :param agent_config: The global configuration for the agent
        :param instances: A list of configuration objects for each instance.
        """
        super(AgentCheck, self).__init__(agent_config)
        self.name = name
        self.init_config = init_config
        self.hostname = util.get_hostname()
        self.log = logging.getLogger('%s.%s' % (__name__, name))

        threshold = agent_config.get('recent_point_threshold', None)
        self.aggregator = (aggregator.MetricsAggregator(
            self.hostname, recent_point_threshold=threshold))

        self.instances = instances or []
        self.library_versions = None
Exemple #10
0
    def create_event(self, state, server, agentConfig):
        """Create an event with a message describing the replication

        state of a mongo node
        """

        def get_state_description(state):
            if state == 0:
                return 'Starting Up'
            elif state == 1:
                return 'Primary'
            elif state == 2:
                return 'Secondary'
            elif state == 3:
                return 'Recovering'
            elif state == 4:
                return 'Fatal'
            elif state == 5:
                return 'Starting up (forking threads)'
            elif state == 6:
                return 'Unknown'
            elif state == 7:
                return 'Arbiter'
            elif state == 8:
                return 'Down'
            elif state == 9:
                return 'Rollback'

        status = get_state_description(state)
        hostname = get_hostname(agentConfig)
        msg_title = "%s is %s" % (server, status)
        msg = "MongoDB %s just reported as %s" % (server, status)

        self.event({
            'timestamp': int(time.time()),
            'event_type': 'Mongo',
            'api_key': agentConfig['api_key'],
            'msg_title': msg_title,
            'msg_text': msg,
            'host': hostname
        })
    def check(self, instance, create_event=True):
        dimensions = self._set_dimensions(None, instance)
        if self.high_watermarks.get(instance.get('name'), None) is None:
            # On the first run of check(), prime the high_watermarks dict
            # so that we only send events that occurred after the agent
            # started.
            # (Setting high_watermarks in the next statement prevents
            # any kind of infinite loop (assuming nothing ever sets
            # high_watermarks to None again!))
            self.high_watermarks[instance.get('name')] = defaultdict(lambda: 0)
            self.check(instance, create_event=False)

        jenkins_home = instance.get('jenkins_home', None)

        if not jenkins_home:
            raise Exception("No jenkins_home directory set in the config file")

        jenkins_jobs_dir = os.path.join(jenkins_home, 'jobs', '*')
        job_dirs = glob(jenkins_jobs_dir)

        if not job_dirs:
            raise Exception('No jobs found in `%s`! '
                            'Check `jenkins_home` in your config' % (jenkins_jobs_dir))

        for job_dir in job_dirs:
            for output in self._get_build_results(instance.get('name'), job_dir):
                output['host'] = get_hostname(self.agent_config)
                if create_event:
                    self.log.debug("Creating event for job: %s" % output['job_name'])
                    self.event(output)

                    dimensions.update({'job_name': output['job_name']})
                    if 'branch' in output:
                        dimensions.update({'branch': output['branch']})
                    self.gauge("jenkins.job.duration", float(
                        output['duration']) / 1000.0, dimensions=dimensions)

                    if output['result'] == 'SUCCESS':
                        self.increment('jenkins.job.success', dimensions=dimensions)
                    else:
                        self.increment('jenkins.job.failure', dimensions=dimensions)
Exemple #12
0
    def __init__(self, name, init_config, agent_config, instances=None):
        """Initialize a new check.

        :param name: The name of the check
        :param init_config: The config for initializing the check
        :param agent_config: The global configuration for the agent
        :param instances: A list of configuration objects for each instance.
        """
        super(AgentCheck, self).__init__(agent_config)
        self.name = name
        self.init_config = init_config
        self.hostname = util.get_hostname()
        self.log = logging.getLogger('%s.%s' % (__name__, name))

        threshold = agent_config.get('recent_point_threshold', None)
        self.aggregator = (
            aggregator.MetricsAggregator(self.hostname,
                                         recent_point_threshold=threshold))

        self.instances = instances or []
        self.library_versions = None
Exemple #13
0
    def create_event(self, state, server, agentConfig):
        """Create an event with a message describing the replication

        state of a mongo node
        """
        def get_state_description(state):
            if state == 0:
                return 'Starting Up'
            elif state == 1:
                return 'Primary'
            elif state == 2:
                return 'Secondary'
            elif state == 3:
                return 'Recovering'
            elif state == 4:
                return 'Fatal'
            elif state == 5:
                return 'Starting up (forking threads)'
            elif state == 6:
                return 'Unknown'
            elif state == 7:
                return 'Arbiter'
            elif state == 8:
                return 'Down'
            elif state == 9:
                return 'Rollback'

        status = get_state_description(state)
        hostname = get_hostname(agentConfig)
        msg_title = "%s is %s" % (server, status)
        msg = "MongoDB %s just reported as %s" % (server, status)

        self.event({
            'timestamp': int(time.time()),
            'event_type': 'Mongo',
            'api_key': agentConfig['api_key'],
            'msg_title': msg_title,
            'msg_text': msg,
            'host': hostname
        })
Exemple #14
0
    def _check(self, instance):
        """Run the desired host-alive check againt this host.

        """

        if not instance['host_name']:
            raise ValueError('Target hostname not specified!')

        dimensions = self._set_dimensions(
            {
                'hostname': instance['host_name'],
                'observer_host': util.get_hostname()
            }, instance)

        success = False

        test_type = instance['alive_test']
        if test_type == 'ssh':
            success, error_message = self._test_ssh(
                instance['host_name'], self.init_config.get('ssh_port'),
                self.init_config.get('ssh_timeout'))
        elif test_type == 'ping':
            success, error_message = self._test_ping(
                instance['host_name'], self.init_config.get('ping_timeout'))
        else:
            error_message = 'Unrecognized alive_test: {0}'.format(test_type)

        dimensions.update({'test_type': test_type})
        if success is True:
            self.gauge('host_alive_status', 0, dimensions=dimensions)
            return services_checks.Status.UP, "UP"
        else:
            self.gauge('host_alive_status',
                       1,
                       dimensions=dimensions,
                       value_meta={'error': error_message})
            self.log.error(
                'Host alive check for {0} failed.  Error was {1}'.format(
                    instance['host_name'], error_message))
            return services_checks.Status.DOWN, "DOWN"
Exemple #15
0
    def _check(self, instance):
        """Run the desired host-alive check againt this host.

        """

        if not instance['host_name']:
            raise ValueError('Target hostname not specified!')

        dimensions = self._set_dimensions({'hostname': instance['host_name'],
                                           'observer_host': util.get_hostname()},
                                          instance)

        success = False

        test_type = instance['alive_test']
        if test_type == 'ssh':
            success, error_message = self._test_ssh(instance['host_name'],
                                                    self.init_config.get('ssh_port'),
                                                    self.init_config.get('ssh_timeout'))
        elif test_type == 'ping':
            success, error_message = self._test_ping(instance['host_name'],
                                                     self.init_config.get('ping_timeout'))
        else:
            error_message = 'Unrecognized alive_test: {0}'.format(test_type)

        dimensions.update({'test_type': test_type})
        if success is True:
            self.gauge('host_alive_status',
                       0,
                       dimensions=dimensions)
            return services_checks.Status.UP, "UP"
        else:
            self.gauge('host_alive_status',
                       1,
                       dimensions=dimensions,
                       value_meta={'error': error_message})
            self.log.error('Host alive check for {0} failed.  Error was {1}'.format(instance['host_name'],
                                                                                    error_message))
            return services_checks.Status.DOWN, "DOWN"
Exemple #16
0
    def check(self, instance):
        if self.high_watermarks.get(instance.get('name'), None) is None:
            # On the first run of check(), prime the high_watermarks dict
            # (Setting high_watermarks in the next statement prevents
            # any kind of infinite loop (assuming nothing ever sets
            # high_watermarks to None again!))
            self.high_watermarks[instance.get('name')] = defaultdict(lambda: 0)
            self.check(instance)

        jenkins_home = instance.get('jenkins_home', None)

        if not jenkins_home:
            raise Exception("No jenkins_home directory set in the config file")

        jenkins_jobs_dir = os.path.join(jenkins_home, 'jobs', '*')
        job_dirs = glob(jenkins_jobs_dir)

        if not job_dirs:
            raise Exception('No jobs found in `%s`! '
                            'Check `jenkins_home` in your config' % (jenkins_jobs_dir))

        for job_dir in job_dirs:
            for output in self._get_build_results(instance.get('name'), job_dir):
                output['host'] = get_hostname()
 def setUp(self):
     unittest.TestCase.setUp(self)
     init_config = {}
     agent_config = {}
     self._host_alive = HostAlive("TestHostAlive", init_config, agent_config)
     self._gauge = mock.Mock()
     self._host_alive.gauge = self._gauge
     self._host_name = "monasca"
     self._instance = {"host_name": self._host_name, "alive_test": "ping"}
     self._base_dimensions = {"test_type": "ping", "hostname": self._host_name, "observer_host": util.get_hostname()}