def remove_rally_check(self): filename = self.rally_cron_file if os.path.exists(filename): os.unlink(filename) if os.path.exists('/etc/nagios/nrpe.d/check_rally.cfg'): nrpe = NRPE() nrpe.remove_check(shortname='rally') nrpe.write()
def create_endpoint_checks(self, creds): """ Create an NRPE check for each Keystone catalog endpoint. Read the Keystone catalog, and create a check for each endpoint listed. If there is a healthcheck endpoint for the API, use that URL, otherwise check the url '/'. If SSL, add a check for the cert. v2 endpoint needs the 'interface' attribute: <Endpoint {'id': 'XXXXX', 'region': 'RegionOne', 'publicurl': 'http://10.x.x.x:9696', 'service_id': 'YYY', 'internalurl': 'http://10.x.x.x:9696', 'enabled': True, 'adminurl': 'http://10.x.x.x:9696'}> """ # provide URLs that can be used for healthcheck for some services # This also provides a nasty hack-ish way to add switches if we need # for some services. health_check_params = { 'aodh': '/healthcheck', 'barbican': '/v1 -e Unauthorized', 'ceilometer': '/ -e Unauthorized -d x-openstack-request-id', 'cinderv1': '/v1 -e Unauthorized -d x-openstack-request-id', 'cinderv2': '/v2 -e Unauthorized', 'cinderv3': '/v3 -e Unauthorized -d x-openstack-request-id', 'designate': '/v2 -e Unauthorized', 'glance': '/healthcheck', 'gnocchi': '/v1 -e Unauthorized', 'heat': '/v1 -e Unauthorized', 'keystone': '/healthcheck', 'nova': '/healthcheck', 'octavia': '/v2 -e Unauthorized', 'placement': '/healthcheck -e Unauthorized -d x-openstack-request-id', 's3': '/healthcheck', 'swift': self.charm_config.get('swift_check_params', '/'), } self.get_keystone_client(creds) endpoints = self.keystone_endpoints services = [svc for svc in self.keystone_services if svc.enabled] nrpe = NRPE() skip_service = set() for endpoint in endpoints: endpoint.service_names = [ x.name for x in services if x.id == endpoint.service_id ] service_name = endpoint.service_names[0] endpoint.healthcheck_url = health_check_params.get( service_name, '/') # Note(aluria): glance-simplestreams-sync does not provide an API to check if service_name == 'image-stream': continue if not hasattr(endpoint, 'interface'): if service_name == 'keystone': # Note(aluria): filter:healthcheck is not configured in v2 # https://docs.openstack.org/keystone/pike/configuration.html#health-check-middleware continue for interface in 'admin internal public'.split(): old_interface_name = '{}url'.format(interface) if not hasattr(endpoint, old_interface_name): continue endpoint.interface = interface endpoint.url = getattr(endpoint, old_interface_name) skip_service.add(service_name) break check_url = urlparse(endpoint.url) if not self.charm_config.get('check_{}_urls'.format( endpoint.interface)): nrpe.remove_check( shortname='{}_{}'.format(service_name, endpoint.interface)) if check_url.scheme == 'https': nrpe.remove_check(shortname='{}_{}_cert'.format( service_name, endpoint.interface)) continue cmd_params = ['/usr/lib/nagios/plugins/check_http'] host, port = self._split_url(check_url.netloc, check_url.scheme) cmd_params.append('-H {} -p {}'.format(host, port)) cmd_params.append('-u {}'.format(endpoint.healthcheck_url)) # if this is https, we want to add a check for cert expiry # also need to tell check_http use use TLS if check_url.scheme == 'https': cmd_params.append('-S') # Add an extra check for TLS cert expiry cmd_params_cert = cmd_params.copy() cmd_params_cert.append('-C {},{}'.format( self.charm_config['tls_warn_days'] or 30, self.charm_config['tls_crit_days'] or 14)) nrpe.add_check( shortname='{}_{}_cert'.format(service_name, endpoint.interface), description='Certificate expiry check for {} {}'.format( service_name, endpoint.interface), check_cmd=' '.join(cmd_params_cert)) # Add the actual health check for the URL nrpe.add_check(shortname='{}_{}'.format(service_name, endpoint.interface), description='Endpoint url check for {} {}'.format( service_name, endpoint.interface), check_cmd=' '.join(cmd_params)) nrpe.write()
def render_checks(self, creds): render(source='nagios.novarc', target=self.novarc, context=creds, owner='nagios', group='nagios') nrpe = NRPE() if not os.path.exists(self.plugins_dir): os.makedirs(self.plugins_dir) self.update_plugins() nova_check_command = os.path.join(self.plugins_dir, 'check_nova_services.py') check_command = '{} --warn {} --crit {} --skip-aggregates {} {}'.format( nova_check_command, self.nova_warn, self.nova_crit, self.nova_skip_aggregates, self.skip_disabled).strip() nrpe.add_check( shortname='nova_services', description='Check that enabled Nova services are up', check_cmd=check_command, ) if self.is_neutron_agents_check_enabled: nrpe.add_check( shortname='neutron_agents', description='Check that enabled Neutron agents are up', check_cmd=os.path.join(self.plugins_dir, 'check_neutron_agents.sh'), ) else: nrpe.remove_check(shortname='neutron_agents') if self.is_loadbalancers_check_enabled: nrpe.add_check( shortname='loadbalancers', description='Check loadbalancers status', check_cmd=os.path.join(self.plugins_dir, 'check_loadbalancers.py'), ) else: nrpe.remove_check(shortname='loadbalancers') if self.contrail_analytics_vip: contrail_check_command = '{} --host {}'.format( os.path.join(self.plugins_dir, 'check_contrail_analytics_alarms.py'), self.contrail_analytics_vip) nrpe.add_check( shortname='contrail_analytics_alarms', description='Check Contrail Analytics alarms', check_cmd=contrail_check_command, ) else: nrpe.remove_check(shortname='contrail_analytics_alarms') if len(self.check_dns): nrpe.add_check( shortname='dns_multi', description='Check DNS names are resolvable', check_cmd='{} {}'.format( os.path.join(self.plugins_dir, 'check_dns_multi.sh'), ' '.join(self.check_dns.split())), ) else: nrpe.remove_check(shortname='dns_multi') nrpe.write() self.create_endpoint_checks(creds)