def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install(['python-dbus', 'lockfile-progs']) log('Refreshing nagios checks') if os.path.isdir(NAGIOS_PLUGINS): rsync( os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', 'check_ceph_status.py'), os.path.join(NAGIOS_PLUGINS, 'check_ceph_status.py')) script = os.path.join(SCRIPTS_DIR, 'collect_ceph_status.sh') rsync( os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', 'collect_ceph_status.sh'), script) cronjob = "{} root {}\n".format('*/5 * * * *', script) write_file(STATUS_CRONFILE, cronjob) # Find out if nrpe set nagios_hostname hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) check_cmd = 'check_ceph_status.py -f {} --degraded_thresh {}' \ ' --misplaced_thresh {}' \ ' --recovery_rate {}'.format(STATUS_FILE, config('nagios_degraded_thresh'), config('nagios_misplaced_thresh'), config('nagios_recovery_rate')) if config('nagios_raise_nodeepscrub'): check_cmd = check_cmd + ' --raise_nodeepscrub' nrpe_setup.add_check( shortname="ceph", description='Check Ceph health {{{}}}'.format(current_unit), check_cmd=check_cmd) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') log('Refreshing nagios checks') if os.path.isdir(NAGIOS_PLUGINS): rsync( os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', 'check_ceph_status.py'), os.path.join(NAGIOS_PLUGINS, 'check_ceph_status.py')) script = os.path.join(SCRIPTS_DIR, 'collect_ceph_status.sh') rsync( os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', 'collect_ceph_status.sh'), script) cronjob = "{} root {}\n".format('*/5 * * * *', script) write_file(STATUS_CRONFILE, cronjob) # Find out if nrpe set nagios_hostname hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe_setup.add_check( shortname="ceph", description='Check Ceph health {%s}' % current_unit, check_cmd='check_ceph_status.py -f {}'.format(STATUS_FILE)) nrpe_setup.write()
def update_nrpe_config(checks_to_remove=None): """ Update the checks for the nagios plugin. :param checks_to_remove: list of short names of nrpe checks to remove. For example, pass ['radosgw'] to remove the check for the default systemd radosgw service, to make way for per host services. :type checks_to_remove: list """ # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.copy_nrpe_checks() if checks_to_remove is not None: log("Removing the following nrpe checks: {}".format(checks_to_remove), level=DEBUG) for svc in checks_to_remove: nrpe_setup.remove_check(shortname=svc) nrpe.add_init_service_checks(nrpe_setup, services(), current_unit) nrpe.add_haproxy_checks(nrpe_setup, current_unit) nrpe_setup.write()
def update_nrpe_config(): hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() services = [service_name] nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.add_init_service_checks(nrpe_setup, services, current_unit) nrpe_setup.write()
def update_nrpe_config(unused=None): services = ('snap.kubelet.daemon', 'snap.kube-proxy.daemon') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.add_init_service_checks(nrpe_setup, services, current_unit) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') log('Refreshing nagios checks') if os.path.isdir(NAGIOS_PLUGINS): rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', 'check_ceph_status.py'), os.path.join(NAGIOS_PLUGINS, 'check_ceph_status.py')) script = os.path.join(SCRIPTS_DIR, 'collect_ceph_status.sh') rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', 'collect_ceph_status.sh'), script) cronjob = "{} root {}\n".format('*/5 * * * *', script) write_file(STATUS_CRONFILE, cronjob) # Find out if nrpe set nagios_hostname hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe_setup.add_check( shortname="ceph", description='Check Ceph health {%s}' % current_unit, check_cmd='check_ceph_status.py -f {}'.format(STATUS_FILE) ) nrpe_setup.write()
def render_nrpe_checks(self): """Configure Nagios NRPE checks.""" hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() charm_nrpe = nrpe.NRPE(hostname=hostname) nrpe.add_init_service_checks(charm_nrpe, self.services, current_unit) charm_nrpe.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') log('Refreshing nrpe checks') if not os.path.exists(NAGIOS_PLUGINS): mkpath(NAGIOS_PLUGINS) rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master', 'check_swift_storage.py'), os.path.join(NAGIOS_PLUGINS, 'check_swift_storage.py')) rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master', 'check_swift_service'), os.path.join(NAGIOS_PLUGINS, 'check_swift_service')) rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'sudo', 'swift-storage'), os.path.join(SUDOERS_D, 'swift-storage')) # Find out if nrpe set nagios_hostname hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) # check the rings and replication nrpe_setup.add_check( shortname='swift_storage', description='Check swift storage ring hashes and replication' ' {%s}' % current_unit, check_cmd='check_swift_storage.py {}'.format( config('nagios-check-params')) ) nrpe.add_init_service_checks(nrpe_setup, SWIFT_SVCS, current_unit) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install(['python-dbus', 'lockfile-progs']) log('Refreshing nagios checks') if os.path.isdir(NAGIOS_PLUGINS): rsync( os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', 'check_ceph_status.py'), os.path.join(NAGIOS_PLUGINS, 'check_ceph_status.py')) script = os.path.join(SCRIPTS_DIR, 'collect_ceph_status.sh') rsync( os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', 'collect_ceph_status.sh'), script) cronjob = "{} root {}\n".format('*/5 * * * *', script) write_file(STATUS_CRONFILE, cronjob) # Find out if nrpe set nagios_hostname hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) check_cmd = 'check_ceph_status.py -f {} --degraded_thresh {}' \ ' --misplaced_thresh {}' \ ' --recovery_rate {}'.format(STATUS_FILE, config('nagios_degraded_thresh'), config('nagios_misplaced_thresh'), config('nagios_recovery_rate')) if config('nagios_raise_nodeepscrub'): check_cmd = check_cmd + ' --raise_nodeepscrub' nrpe_setup.add_check( shortname="ceph", description='Check Ceph health {{{}}}'.format(current_unit), check_cmd=check_cmd) if config('nagios_additional_checks'): additional_critical = config('nagios_additional_checks_critical') x = ast.literal_eval(config('nagios_additional_checks')) for key, value in x.items(): name = "ceph-{}".format(key.replace(" ", "")) log("Adding check {}".format(name)) check_cmd = 'check_ceph_status.py -f {}' \ ' --additional_check \\\"{}\\\"' \ ' {}'.format(STATUS_FILE, value, "--additional_check_critical" if additional_critical is True else "") nrpe_setup.add_check( shortname=name, description='Additional Ceph checks {{{}}}'.format( current_unit), check_cmd=check_cmd) if config('nagios_check_num_osds'): check_cmd = 'check_ceph_status.py -f {} --check_num_osds'.format( STATUS_FILE) nrpe_setup.add_check( shortname='ceph_num_osds', description='Check whether all OSDs are up and in', check_cmd=check_cmd) nrpe_setup.write()
def update_nrpe_config(svc): # python-dbus is used by check_upstart_job fetch.apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.add_init_service_checks(nrpe_setup, SVCNAME, current_unit) nrpe_setup.write()
def update_nrpe_config(unused=None): services = ('nginx', ) hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.add_init_service_checks(nrpe_setup, services, current_unit) nrpe_setup.write()
def update_nrpe_config(unused=None): services = ('nginx',) hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.add_init_service_checks(nrpe_setup, services, current_unit) nrpe_setup.write()
def update_nrpe_config(unused=None): services = ('kube-apiserver', 'kube-controller-manager', 'kube-scheduler') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.add_init_service_checks(nrpe_setup, services, current_unit) nrpe_setup.write()
def update_nrpe_config(unused=None): hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname, primary=True) # Add a check for the snap's systemd service nrpe.add_init_service_checks(nrpe_setup, SNAP_SERVICES, current_unit) nrpe_setup.write()
def remove_nrpe_config(nagios=None): hostname = nrpe.get_nagios_hostname() nrpe_setup = nrpe.NRPE(hostname=hostname) for service in SNAP_SERVICES: nrpe_setup.remove_check(shortname=service) remove_state(charm_state('nrpe-initial-config'))
def main(): cmd = subprocess.Popen(['lsmod'], stdout=subprocess.PIPE, stderr=subprocess.PIPE) stdout, stederr = cmd.communicate() if re.compile('(megaraid).*').findall(stdout.decode('utf-8')): megaraid = True if os.path.isdir(NAGIOS_PLUGINS): rsync( os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', 'check_lsi_raid'), os.path.join(NAGIOS_PLUGINS, 'check_lsi_raid')) if os.path.isdir(SUDOERS_DIR): rsync( os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', 'nagios_sudoers'), os.path.join(SUDOERS_DIR, 'nagios_sudoers')) if os.path.isdir(NAGIOS_PLUGINS): rsync( os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios', 'check_bond'), os.path.join(NAGIOS_PLUGINS, 'check_bond')) hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) # Install megaraid tools # And add megaraid nagios check if megaraid: install_packages(['storcli', 'libfile-which-perl']) nrpe_setup.add_check(shortname='lsi-raid', description='LSI Raid Check {%s}' % current_unit, check_cmd=(os.path.join(NAGIOS_PLUGINS, 'check_lsi_raid'))) # Install checks for the network bonds if os.path.isfile('/proc/net/bonding/bond0') and \ os.path.isfile('/proc/net/bonding/bond1'): nrpe_setup.add_check( shortname='bond0', description='Bond0 check {%s}' % current_unit, check_cmd=(os.path.join(NAGIOS_PLUGINS, 'check_bond') + ' -i bond0 -p eth2')) nrpe_setup.add_check( shortname='bond0', description='Bond1 check {%s}' % current_unit, check_cmd=(os.path.join(NAGIOS_PLUGINS, 'check_bond') + ' -i bond1 -p eth3')) nrpe_setup.write() reactive.set_state('raidcheck_installed') hookenv.status_set('active', 'Unit is ready')
def test_get_nagios_hostname(self): rel_info = { 'nagios_hostname': 'bob-openstack-dashboard-0', 'private-address': '10.5.3.103', '__unit__': u'dashboard-nrpe/1', '__relid__': u'nrpe-external-master:2', 'nagios_host_context': u'bob', } self.patched['relations_of_type'].return_value = [rel_info] self.assertEqual(nrpe.get_nagios_hostname(), 'bob-openstack-dashboard-0')
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.copy_nrpe_checks() nrpe.add_init_service_checks(nrpe_setup, services(), current_unit) nrpe.add_haproxy_checks(nrpe_setup, current_unit) nrpe_setup.write()
def remove_nrpe_config(): hookenv.log('Removing nrpe checks for services: ' '{}'.format(MONITORED_SERVICES)) hostname = nrpe.get_nagios_hostname() nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False) for check in MONITORED_SERVICES: nrpe_setup.remove_check(shortname=check) nrpe_setup.write() remove_state('nrpe-external-master.initial-config')
def update_nrpe_config(unused=None): # List of systemd services that will be checked services = ('snap.etcd.etcd', ) # The current nrpe-external-master interface doesn't handle a lot of logic, # use the charm-helpers code for now. hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False) nrpe.add_init_service_checks(nrpe_setup, services, current_unit) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.add_init_service_checks(nrpe_setup, ['mysql'], current_unit) nrpe_setup.add_check(shortname='mysql_proc', description='Check MySQL process {%s}' % current_unit, check_cmd='check_procs -c 1:1 -C mysqld') nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe_files_dir = os.path.join(charm_dir(), 'charmhelpers', 'contrib', 'openstack', 'files') nrpe.copy_nrpe_checks(nrpe_files_dir=nrpe_files_dir) nrpe.add_init_service_checks(nrpe_setup, services(), current_unit) nrpe.add_haproxy_checks(nrpe_setup, current_unit) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe_setup.add_check( shortname='ceph-osd', description='process check {%s}' % current_unit, check_cmd=('/bin/cat /var/lib/ceph/osd/ceph-*/whoami |' 'xargs -I@ status ceph-osd id=@ && exit 0 || exit 2')) nrpe_setup.write()
def remove_nrpe_config(): remove_state("nrpe-external-master.initial-config") remove_nagios_plugin("check_k8s_worker.py") # The current nrpe-external-master interface doesn't handle a lot of logic, # use the charm-helpers code for now. hostname = nrpe.get_nagios_hostname() nrpe_setup = nrpe.NRPE(hostname=hostname) for service in worker_services: nrpe_setup.remove_check(shortname=service) nrpe_setup.remove_check(shortname="node")
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe_files_dir = os.path.join( charm_dir(), 'charmhelpers', 'contrib', 'openstack', 'files') nrpe.copy_nrpe_checks(nrpe_files_dir=nrpe_files_dir) nrpe.add_init_service_checks(nrpe_setup, services(), current_unit) nrpe.add_haproxy_checks(nrpe_setup, current_unit) nrpe_setup.write()
def configure_nrpe(unused=None): hookenv.log('Configuring nrpe checks for services: ' '{}'.format(MONITORED_SERVICES)) # The current nrpe-external-master interface doesn't handle a lot of logic, # use the charm-helpers code for now. hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False) nrpe.add_init_service_checks(nrpe_setup, MONITORED_SERVICES, current_unit) nrpe_setup.write() set_state('nrpe-external-master.initial-config')
def update_nrpe_checks(): if os.path.isdir(NAGIOS_PLUGINS): rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts', 'check_rabbitmq.py'), os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py')) rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts', 'check_rabbitmq_queues.py'), os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_queues.py')) if config('stats_cron_schedule'): script = os.path.join(SCRIPTS_DIR, 'collect_rabbitmq_stats.sh') cronjob = CRONJOB_CMD.format(schedule=config('stats_cron_schedule'), timeout=config('cron-timeout'), command=script) rsync(os.path.join(charm_dir(), 'scripts', 'collect_rabbitmq_stats.sh'), script) write_file(STATS_CRONFILE, cronjob) elif os.path.isfile(STATS_CRONFILE): os.remove(STATS_CRONFILE) # Find out if nrpe set nagios_hostname hostname = nrpe.get_nagios_hostname() myunit = nrpe.get_nagios_unit_name() # create unique user and vhost for each unit current_unit = local_unit().replace('/', '-') user = '******' % current_unit vhost = 'nagios-%s' % current_unit password = rabbit.get_rabbit_password(user, local=True) rabbit.create_vhost(vhost) rabbit.create_user(user, password) rabbit.grant_permissions(user, vhost) nrpe_compat = nrpe.NRPE(hostname=hostname) nrpe_compat.add_check( shortname=rabbit.RABBIT_USER, description='Check RabbitMQ {%s}' % myunit, check_cmd='{}/check_rabbitmq.py --user {} --password {} --vhost {}' ''.format(NAGIOS_PLUGINS, user, password, vhost) ) if config('queue_thresholds'): cmd = "" # If value of queue_thresholds is incorrect we want the hook to fail for item in yaml.safe_load(config('queue_thresholds')): cmd += ' -c "{}" "{}" {} {}'.format(*item) nrpe_compat.add_check( shortname=rabbit.RABBIT_USER + '_queue', description='Check RabbitMQ Queues', check_cmd='{}/check_rabbitmq_queues.py{} {}'.format( NAGIOS_PLUGINS, cmd, STATS_DATAFILE) ) nrpe_compat.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe_setup.add_check( shortname='ceph-osd', description='process check {%s}' % current_unit, check_cmd=('/bin/cat /var/lib/ceph/osd/ceph-*/whoami |' 'xargs -I@ status ceph-osd id=@ && exit 0 || exit 2') ) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job if not install_nrpe_deps(): return hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe_setup.add_check( shortname='elasticsearch-cluster-status', description='Elasticsearch cluster status check {%s}' % current_unit, check_cmd=('/usr/local/bin/check-elasticsearch')) nrpe_setup.write()
def remove_nrpe_config(nagios=None): remove_state('nrpe-external-master.initial-config') # List of systemd services for which the checks will be removed services = ('snap.kubelet.daemon', 'snap.kube-proxy.daemon') # The current nrpe-external-master interface doesn't handle a lot of logic, # use the charm-helpers code for now. hostname = nrpe.get_nagios_hostname() nrpe_setup = nrpe.NRPE(hostname=hostname) for service in services: nrpe_setup.remove_check(shortname=service)
def remove_nrpe_config(nagios=None): remove_state(NPRE_EXTERNAL_RELATION + ".initial-config") # List of systemd services for which the checks will be removed services = ("snap.etcd.etcd", ) # The current nrpe-external interface doesn't handle a lot of logic, # use the charm-helpers code for now. hostname = nrpe.get_nagios_hostname() nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False) for service in services: nrpe_setup.remove_check(shortname=service)
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.add_init_service_checks(nrpe_setup, 'mysql', current_unit) nrpe_setup.add_check( shortname='mysql_proc', description='Check MySQL process {%s}' % current_unit, check_cmd='check_procs -c 1:1 -C mysqld' ) nrpe_setup.write()
def update_nrpe_checks(): if os.path.isdir(NAGIOS_PLUGINS): rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts', 'check_rabbitmq.py'), os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py')) rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts', 'check_rabbitmq_queues.py'), os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_queues.py')) if config('stats_cron_schedule'): script = os.path.join(SCRIPTS_DIR, 'collect_rabbitmq_stats.sh') cronjob = "{} root {}\n".format(config('stats_cron_schedule'), script) rsync(os.path.join(charm_dir(), 'scripts', 'collect_rabbitmq_stats.sh'), script) write_file(STATS_CRONFILE, cronjob) elif os.path.isfile(STATS_CRONFILE): os.remove(STATS_CRONFILE) # Find out if nrpe set nagios_hostname hostname = nrpe.get_nagios_hostname() myunit = nrpe.get_nagios_unit_name() # create unique user and vhost for each unit current_unit = local_unit().replace('/', '-') user = '******' % current_unit vhost = 'nagios-%s' % current_unit password = rabbit.get_rabbit_password(user, local=True) rabbit.create_vhost(vhost) rabbit.create_user(user, password) rabbit.grant_permissions(user, vhost) nrpe_compat = nrpe.NRPE(hostname=hostname) nrpe_compat.add_check( shortname=rabbit.RABBIT_USER, description='Check RabbitMQ {%s}' % myunit, check_cmd='{}/check_rabbitmq.py --user {} --password {} --vhost {}' ''.format(NAGIOS_PLUGINS, user, password, vhost) ) if config('queue_thresholds'): cmd = "" # If value of queue_thresholds is incorrect we want the hook to fail for item in yaml.safe_load(config('queue_thresholds')): cmd += ' -c "{}" "{}" {} {}'.format(*item) nrpe_compat.add_check( shortname=rabbit.RABBIT_USER + '_queue', description='Check RabbitMQ Queues', check_cmd='{}/check_rabbitmq_queues.py{} {}'.format( NAGIOS_PLUGINS, cmd, STATS_DATAFILE) ) nrpe_compat.write()
def update_nrpe_config(): """ :return: None """ # List of systemd services that will be checked. services = ['docker'] # The current nrpe-external-master interface doesn't handle a lot of logic, # use the charm-helpers code for now. hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.add_init_service_checks(nrpe_setup, services, current_unit) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) monitored_services = services() try: # qemu-kvm is a one-shot service monitored_services.remove('qemu-kvm') except ValueError: pass nrpe.add_init_service_checks(nrpe_setup, monitored_services, current_unit) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.copy_nrpe_checks() nrpe.add_init_service_checks(nrpe_setup, services(), current_unit) if service_enabled('api'): nrpe.add_haproxy_checks(nrpe_setup, current_unit) else: nrpe.remove_deprecated_check(nrpe_setup, ["haproxy_servers", "haproxy_queue"]) nrpe_setup.write()
def update_nrpe_config(): scripts_src = os.path.join(os.environ["CHARM_DIR"], "files", "nrpe") scripts_dst = "/usr/local/lib/nagios/plugins" if not os.path.exists(scripts_dst): os.makedirs(scripts_dst) for fname in glob.glob(os.path.join(scripts_src, "*")): if os.path.isfile(fname): shutil.copy2(fname, os.path.join(scripts_dst, os.path.basename(fname))) sudoers_src = os.path.join(os.environ["CHARM_DIR"], "files", "sudoers") sudoers_dst = "/etc/sudoers.d" for fname in glob.glob(os.path.join(sudoers_src, "*")): if os.path.isfile(fname): shutil.copy2(fname, os.path.join(sudoers_dst, os.path.basename(fname))) hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) apt_install('python-dbus') # corosync/crm checks nrpe_setup.add_check( shortname='corosync_rings', description='Check Corosync rings {%s}' % current_unit, check_cmd='check_corosync_rings') nrpe_setup.add_check( shortname='crm_status', description='Check crm status {%s}' % current_unit, check_cmd='check_crm') # process checks nrpe_setup.add_check( shortname='corosync_proc', description='Check Corosync process {%s}' % current_unit, check_cmd='check_procs -c 1:1 -C corosync' ) nrpe_setup.add_check( shortname='pacemakerd_proc', description='Check Pacemakerd process {%s}' % current_unit, check_cmd='check_procs -c 1:1 -C pacemakerd' ) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job # fasteners is used by apt_install collect_ceph_osd_services.py pkgs = ['python3-dbus'] if CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'bionic': pkgs.append('python3-fasteners') apt_install(pkgs) # copy the check and collect files over to the plugins directory charm_dir = os.environ.get('CHARM_DIR', '') nagios_plugins = '/usr/local/lib/nagios/plugins' # Grab nagios user/group ID's from original source _dir = os.stat(nagios_plugins) uid = _dir.st_uid gid = _dir.st_gid for name in ('collect_ceph_osd_services.py', 'check_ceph_osd_services.py'): target = os.path.join(nagios_plugins, name) shutil.copy(os.path.join(charm_dir, 'files', 'nagios', name), target) os.chown(target, uid, gid) hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() # BUG#1810749 - the nagios user can't access /var/lib/ceph/.. and that's a # GOOD THING, as it keeps ceph secure from Nagios. However, to check # whether ceph is okay, the check_systemd.py or 'status ceph-osd' still # needs to be called with the contents of ../osd/ceph-*/whoami files. To # get around this conundrum, instead a cron.d job that runs as root will # perform the checks every minute, and write to a tempory file the results, # and the nrpe check will grep this file and error out (return 2) if the # first 3 characters of a line are not 'OK:'. cmd = ('MAILTO=""\n' '* * * * * root ' '/usr/local/lib/nagios/plugins/collect_ceph_osd_services.py' ' 2>&1 | logger -t check-osd\n') with open(CRON_CEPH_CHECK_FILE, "wt") as f: f.write(cmd) nrpe_cmd = '/usr/local/lib/nagios/plugins/check_ceph_osd_services.py' nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe_setup.add_check( shortname='ceph-osd', description='process check {%s}' % current_unit, check_cmd=nrpe_cmd ) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job log('Updating NRPE configuration') status_set('maintenance', 'Updating NRPE configuration') apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.copy_nrpe_checks() _services = [] for service in services(): if service.startswith('snap.'): service = service.split('.')[1] _services.append(service) nrpe.add_init_service_checks(nrpe_setup, _services, current_unit) nrpe.add_haproxy_checks(nrpe_setup, current_unit) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.copy_nrpe_checks() nrpe.add_init_service_checks(nrpe_setup, services(), current_unit) nrpe.add_haproxy_checks(nrpe_setup, current_unit) conf = nrpe_setup.config check_http_params = conf.get('nagios_check_http_params') if check_http_params: nrpe_setup.add_check( shortname='vhost', description='Check Virtual Host {%s}' % current_unit, check_cmd='check_http %s' % check_http_params ) nrpe_setup.write()
def update_nrpe_config(): # python-dbus is used by check_upstart_job apt_install('python-dbus') hostname = nrpe.get_nagios_hostname() current_unit = nrpe.get_nagios_unit_name() nrpe_setup = nrpe.NRPE(hostname=hostname) nrpe.add_init_service_checks(nrpe_setup, services(), current_unit) cronpath = '/etc/cron.d/nagios-netns-check' cron_template = ('*/5 * * * * root ' '/usr/local/lib/nagios/plugins/check_netns.sh ' '> /var/lib/nagios/netns-check.txt\n' ) f = open(cronpath, 'w') f.write(cron_template) f.close() nrpe_setup.add_check( shortname="netns", description='Network Namespace check {%s}' % current_unit, check_cmd='check_status_file.py -f /var/lib/nagios/netns-check.txt' ) nrpe_setup.write()
def update_nrpe_checks(): if os.path.isdir(NAGIOS_PLUGINS): rsync(os.path.join(charm_dir(), 'scripts', 'check_rabbitmq.py'), os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py')) rsync(os.path.join(charm_dir(), 'scripts', 'check_rabbitmq_queues.py'), os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_queues.py')) if config('management_plugin'): rsync(os.path.join(charm_dir(), 'scripts', 'check_rabbitmq_cluster.py'), os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_cluster.py')) if config('stats_cron_schedule'): script = os.path.join(SCRIPTS_DIR, 'collect_rabbitmq_stats.sh') cronjob = CRONJOB_CMD.format(schedule=config('stats_cron_schedule'), timeout=config('cron-timeout'), command=script) rsync(os.path.join(charm_dir(), 'scripts', 'collect_rabbitmq_stats.sh'), script) write_file(STATS_CRONFILE, cronjob) elif os.path.isfile(STATS_CRONFILE): os.remove(STATS_CRONFILE) # Find out if nrpe set nagios_hostname hostname = nrpe.get_nagios_hostname() myunit = nrpe.get_nagios_unit_name() # create unique user and vhost for each unit current_unit = local_unit().replace('/', '-') user = '******' % current_unit vhost = 'nagios-%s' % current_unit password = rabbit.get_rabbit_password(user, local=True) rabbit.create_vhost(vhost) rabbit.create_user(user, password, ['monitoring']) rabbit.grant_permissions(user, vhost) nrpe_compat = nrpe.NRPE(hostname=hostname) if config('ssl') in ['off', 'on']: cmd = ('{plugins_dir}/check_rabbitmq.py --user {user} ' '--password {password} --vhost {vhost}') cmd = cmd.format(plugins_dir=NAGIOS_PLUGINS, user=user, password=password, vhost=vhost) nrpe_compat.add_check( shortname=rabbit.RABBIT_USER, description='Check RabbitMQ {%s}' % myunit, check_cmd=cmd ) if config('ssl') in ['only', 'on']: log('Adding rabbitmq SSL check', level=DEBUG) cmd = ('{plugins_dir}/check_rabbitmq.py --user {user} ' '--password {password} --vhost {vhost} ' '--ssl --ssl-ca {ssl_ca} --port {port}') cmd = cmd.format(plugins_dir=NAGIOS_PLUGINS, user=user, password=password, port=int(config('ssl_port')), vhost=vhost, ssl_ca=SSL_CA_FILE) nrpe_compat.add_check( shortname=rabbit.RABBIT_USER + "_ssl", description='Check RabbitMQ (SSL) {%s}' % myunit, check_cmd=cmd ) if config('queue_thresholds'): cmd = "" # If value of queue_thresholds is incorrect we want the hook to fail for item in yaml.safe_load(config('queue_thresholds')): cmd += ' -c "{}" "{}" {} {}'.format(*item) nrpe_compat.add_check( shortname=rabbit.RABBIT_USER + '_queue', description='Check RabbitMQ Queues', check_cmd='{}/check_rabbitmq_queues.py{} {}'.format( NAGIOS_PLUGINS, cmd, STATS_DATAFILE) ) if config('management_plugin'): # add NRPE check nrpe_compat.add_check( shortname=rabbit.RABBIT_USER + '_cluster', description='Check RabbitMQ Cluster', check_cmd='{}/check_rabbitmq_cluster.py --port {} --user {} --password {}'.format( NAGIOS_PLUGINS, rabbit.get_managment_port(), user, password ) ) nrpe_compat.write()