Ejemplo n.º 1
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install(['python-dbus', 'lockfile-progs'])
    log('Refreshing nagios checks')
    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(
            os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios',
                         'check_ceph_status.py'),
            os.path.join(NAGIOS_PLUGINS, 'check_ceph_status.py'))

    script = os.path.join(SCRIPTS_DIR, 'collect_ceph_status.sh')
    rsync(
        os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios',
                     'collect_ceph_status.sh'), script)
    cronjob = "{} root {}\n".format('*/5 * * * *', script)
    write_file(STATUS_CRONFILE, cronjob)

    # Find out if nrpe set nagios_hostname
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    check_cmd = 'check_ceph_status.py -f {} --degraded_thresh {}' \
        ' --misplaced_thresh {}' \
        ' --recovery_rate {}'.format(STATUS_FILE,
                                     config('nagios_degraded_thresh'),
                                     config('nagios_misplaced_thresh'),
                                     config('nagios_recovery_rate'))
    if config('nagios_raise_nodeepscrub'):
        check_cmd = check_cmd + ' --raise_nodeepscrub'
    nrpe_setup.add_check(
        shortname="ceph",
        description='Check Ceph health {{{}}}'.format(current_unit),
        check_cmd=check_cmd)
    nrpe_setup.write()
Ejemplo n.º 2
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    log('Refreshing nagios checks')
    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(
            os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios',
                         'check_ceph_status.py'),
            os.path.join(NAGIOS_PLUGINS, 'check_ceph_status.py'))

    script = os.path.join(SCRIPTS_DIR, 'collect_ceph_status.sh')
    rsync(
        os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios',
                     'collect_ceph_status.sh'), script)
    cronjob = "{} root {}\n".format('*/5 * * * *', script)
    write_file(STATUS_CRONFILE, cronjob)

    # Find out if nrpe set nagios_hostname
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_setup.add_check(
        shortname="ceph",
        description='Check Ceph health {%s}' % current_unit,
        check_cmd='check_ceph_status.py -f {}'.format(STATUS_FILE))
    nrpe_setup.write()
Ejemplo n.º 3
0
def update_nrpe_config(checks_to_remove=None):
    """
    Update the checks for the nagios plugin.

    :param checks_to_remove: list of short names of nrpe checks to
        remove. For example, pass ['radosgw'] to remove the check for
        the default systemd radosgw service, to make way for per host
        services.
    :type checks_to_remove: list

    """
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.copy_nrpe_checks()
    if checks_to_remove is not None:
        log("Removing the following nrpe checks: {}".format(checks_to_remove),
            level=DEBUG)
        for svc in checks_to_remove:
            nrpe_setup.remove_check(shortname=svc)
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
    nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    nrpe_setup.write()
Ejemplo n.º 4
0
def update_nrpe_config():
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    services = [service_name]
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, services, current_unit)
    nrpe_setup.write()
Ejemplo n.º 5
0
def update_nrpe_config(unused=None):
    services = ('snap.kubelet.daemon', 'snap.kube-proxy.daemon')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, services, current_unit)
    nrpe_setup.write()
Ejemplo n.º 6
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    log('Refreshing nagios checks')
    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios',
                           'check_ceph_status.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_ceph_status.py'))

    script = os.path.join(SCRIPTS_DIR, 'collect_ceph_status.sh')
    rsync(os.path.join(os.getenv('CHARM_DIR'), 'files',
                       'nagios', 'collect_ceph_status.sh'),
          script)
    cronjob = "{} root {}\n".format('*/5 * * * *', script)
    write_file(STATUS_CRONFILE, cronjob)

    # Find out if nrpe set nagios_hostname
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_setup.add_check(
        shortname="ceph",
        description='Check Ceph health {%s}' % current_unit,
        check_cmd='check_ceph_status.py -f {}'.format(STATUS_FILE)
    )
    nrpe_setup.write()
Ejemplo n.º 7
0
 def render_nrpe_checks(self):
     """Configure Nagios NRPE checks."""
     hostname = nrpe.get_nagios_hostname()
     current_unit = nrpe.get_nagios_unit_name()
     charm_nrpe = nrpe.NRPE(hostname=hostname)
     nrpe.add_init_service_checks(charm_nrpe, self.services, current_unit)
     charm_nrpe.write()
Ejemplo n.º 8
0
def update_nrpe_config(checks_to_remove=None):
    """
    Update the checks for the nagios plugin.

    :param checks_to_remove: list of short names of nrpe checks to
        remove. For example, pass ['radosgw'] to remove the check for
        the default systemd radosgw service, to make way for per host
        services.
    :type checks_to_remove: list

    """
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.copy_nrpe_checks()
    if checks_to_remove is not None:
        log("Removing the following nrpe checks: {}".format(checks_to_remove),
            level=DEBUG)
        for svc in checks_to_remove:
            nrpe_setup.remove_check(shortname=svc)
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
    nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    nrpe_setup.write()
Ejemplo n.º 9
0
def update_nrpe_config(unused=None):
    services = ('snap.kubelet.daemon', 'snap.kube-proxy.daemon')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, services, current_unit)
    nrpe_setup.write()
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    log('Refreshing nrpe checks')
    if not os.path.exists(NAGIOS_PLUGINS):
        mkpath(NAGIOS_PLUGINS)
    rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master',
                       'check_swift_storage.py'),
          os.path.join(NAGIOS_PLUGINS, 'check_swift_storage.py'))
    rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master',
                       'check_swift_service'),
          os.path.join(NAGIOS_PLUGINS, 'check_swift_service'))
    rsync(os.path.join(os.getenv('CHARM_DIR'), 'files', 'sudo',
                       'swift-storage'),
          os.path.join(SUDOERS_D, 'swift-storage'))

    # Find out if nrpe set nagios_hostname
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)

    # check the rings and replication
    nrpe_setup.add_check(
        shortname='swift_storage',
        description='Check swift storage ring hashes and replication'
                    ' {%s}' % current_unit,
        check_cmd='check_swift_storage.py {}'.format(
            config('nagios-check-params'))
    )
    nrpe.add_init_service_checks(nrpe_setup, SWIFT_SVCS, current_unit)
    nrpe_setup.write()
Ejemplo n.º 11
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install(['python-dbus', 'lockfile-progs'])
    log('Refreshing nagios checks')
    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(
            os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios',
                         'check_ceph_status.py'),
            os.path.join(NAGIOS_PLUGINS, 'check_ceph_status.py'))

    script = os.path.join(SCRIPTS_DIR, 'collect_ceph_status.sh')
    rsync(
        os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios',
                     'collect_ceph_status.sh'), script)
    cronjob = "{} root {}\n".format('*/5 * * * *', script)
    write_file(STATUS_CRONFILE, cronjob)

    # Find out if nrpe set nagios_hostname
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    check_cmd = 'check_ceph_status.py -f {} --degraded_thresh {}' \
        ' --misplaced_thresh {}' \
        ' --recovery_rate {}'.format(STATUS_FILE,
                                     config('nagios_degraded_thresh'),
                                     config('nagios_misplaced_thresh'),
                                     config('nagios_recovery_rate'))
    if config('nagios_raise_nodeepscrub'):
        check_cmd = check_cmd + ' --raise_nodeepscrub'
    nrpe_setup.add_check(
        shortname="ceph",
        description='Check Ceph health {{{}}}'.format(current_unit),
        check_cmd=check_cmd)

    if config('nagios_additional_checks'):
        additional_critical = config('nagios_additional_checks_critical')
        x = ast.literal_eval(config('nagios_additional_checks'))

        for key, value in x.items():
            name = "ceph-{}".format(key.replace(" ", ""))
            log("Adding check {}".format(name))
            check_cmd = 'check_ceph_status.py -f {}' \
                ' --additional_check \\\"{}\\\"' \
                ' {}'.format(STATUS_FILE, value,
                             "--additional_check_critical"
                             if additional_critical is True else "")
            nrpe_setup.add_check(
                shortname=name,
                description='Additional Ceph checks {{{}}}'.format(
                    current_unit),
                check_cmd=check_cmd)
    if config('nagios_check_num_osds'):
        check_cmd = 'check_ceph_status.py -f {} --check_num_osds'.format(
            STATUS_FILE)
        nrpe_setup.add_check(
            shortname='ceph_num_osds',
            description='Check whether all OSDs are up and in',
            check_cmd=check_cmd)
    nrpe_setup.write()
Ejemplo n.º 12
0
def update_nrpe_config(svc):
    # python-dbus is used by check_upstart_job
    fetch.apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, SVCNAME, current_unit)
    nrpe_setup.write()
Ejemplo n.º 13
0
def update_nrpe_config(unused=None):
    services = ('nginx', )

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, services, current_unit)
    nrpe_setup.write()
Ejemplo n.º 14
0
def update_nrpe_config(unused=None):
    services = ('nginx',)

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, services, current_unit)
    nrpe_setup.write()
Ejemplo n.º 15
0
def update_nrpe_config(svc):
    # python-dbus is used by check_upstart_job
    fetch.apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, SVCNAME, current_unit)
    nrpe_setup.write()
Ejemplo n.º 16
0
def update_nrpe_config(unused=None):
    services = ('kube-apiserver', 'kube-controller-manager', 'kube-scheduler')

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, services, current_unit)
    nrpe_setup.write()
Ejemplo n.º 17
0
def update_nrpe_config(unused=None):
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname, primary=True)

    # Add a check for the snap's systemd service
    nrpe.add_init_service_checks(nrpe_setup, SNAP_SERVICES, current_unit)
    nrpe_setup.write()
Ejemplo n.º 18
0
def update_nrpe_config(unused=None):
    services = ('kube-apiserver', 'kube-controller-manager', 'kube-scheduler')

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, services, current_unit)
    nrpe_setup.write()
Ejemplo n.º 19
0
def remove_nrpe_config(nagios=None):

    hostname = nrpe.get_nagios_hostname()
    nrpe_setup = nrpe.NRPE(hostname=hostname)

    for service in SNAP_SERVICES:
        nrpe_setup.remove_check(shortname=service)

    remove_state(charm_state('nrpe-initial-config'))
Ejemplo n.º 20
0
def main():
    cmd = subprocess.Popen(['lsmod'],
                           stdout=subprocess.PIPE,
                           stderr=subprocess.PIPE)
    stdout, stederr = cmd.communicate()
    if re.compile('(megaraid).*').findall(stdout.decode('utf-8')):
        megaraid = True

    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(
            os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios',
                         'check_lsi_raid'),
            os.path.join(NAGIOS_PLUGINS, 'check_lsi_raid'))

    if os.path.isdir(SUDOERS_DIR):
        rsync(
            os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios',
                         'nagios_sudoers'),
            os.path.join(SUDOERS_DIR, 'nagios_sudoers'))

    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(
            os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios',
                         'check_bond'),
            os.path.join(NAGIOS_PLUGINS, 'check_bond'))

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)

    # Install megaraid tools
    # And add megaraid nagios check
    if megaraid:
        install_packages(['storcli', 'libfile-which-perl'])

        nrpe_setup.add_check(shortname='lsi-raid',
                             description='LSI Raid Check {%s}' % current_unit,
                             check_cmd=(os.path.join(NAGIOS_PLUGINS,
                                                     'check_lsi_raid')))

    # Install checks for the network bonds
    if os.path.isfile('/proc/net/bonding/bond0') and \
       os.path.isfile('/proc/net/bonding/bond1'):
        nrpe_setup.add_check(
            shortname='bond0',
            description='Bond0 check {%s}' % current_unit,
            check_cmd=(os.path.join(NAGIOS_PLUGINS, 'check_bond') +
                       ' -i bond0 -p eth2'))
        nrpe_setup.add_check(
            shortname='bond0',
            description='Bond1 check {%s}' % current_unit,
            check_cmd=(os.path.join(NAGIOS_PLUGINS, 'check_bond') +
                       ' -i bond1 -p eth3'))

    nrpe_setup.write()
    reactive.set_state('raidcheck_installed')
    hookenv.status_set('active', 'Unit is ready')
Ejemplo n.º 21
0
 def test_get_nagios_hostname(self):
     rel_info = {
         'nagios_hostname': 'bob-openstack-dashboard-0',
         'private-address': '10.5.3.103',
         '__unit__': u'dashboard-nrpe/1',
         '__relid__': u'nrpe-external-master:2',
         'nagios_host_context': u'bob',
     }
     self.patched['relations_of_type'].return_value = [rel_info]
     self.assertEqual(nrpe.get_nagios_hostname(), 'bob-openstack-dashboard-0')
Ejemplo n.º 22
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.copy_nrpe_checks()
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
    nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    nrpe_setup.write()
Ejemplo n.º 23
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.copy_nrpe_checks()
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
    nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    nrpe_setup.write()
Ejemplo n.º 24
0
def remove_nrpe_config():
    hookenv.log('Removing nrpe checks for services: '
                '{}'.format(MONITORED_SERVICES))
    hostname = nrpe.get_nagios_hostname()
    nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False)

    for check in MONITORED_SERVICES:
        nrpe_setup.remove_check(shortname=check)
    nrpe_setup.write()

    remove_state('nrpe-external-master.initial-config')
Ejemplo n.º 25
0
def update_nrpe_config(unused=None):
    # List of systemd services that will be checked
    services = ('snap.etcd.etcd', )

    # The current nrpe-external-master interface doesn't handle a lot of logic,
    # use the charm-helpers code for now.
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False)
    nrpe.add_init_service_checks(nrpe_setup, services, current_unit)
    nrpe_setup.write()
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, ['mysql'], current_unit)
    nrpe_setup.add_check(shortname='mysql_proc',
                         description='Check MySQL process {%s}' % current_unit,
                         check_cmd='check_procs -c 1:1 -C mysqld')
    nrpe_setup.write()
Ejemplo n.º 27
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_files_dir = os.path.join(charm_dir(), 'charmhelpers', 'contrib',
                                  'openstack', 'files')
    nrpe.copy_nrpe_checks(nrpe_files_dir=nrpe_files_dir)
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
    nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    nrpe_setup.write()
Ejemplo n.º 28
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_setup.add_check(
        shortname='ceph-osd',
        description='process check {%s}' % current_unit,
        check_cmd=('/bin/cat /var/lib/ceph/osd/ceph-*/whoami |'
                   'xargs -I@ status ceph-osd id=@ && exit 0 || exit 2'))
    nrpe_setup.write()
Ejemplo n.º 29
0
def remove_nrpe_config():
    remove_state("nrpe-external-master.initial-config")
    remove_nagios_plugin("check_k8s_worker.py")

    # The current nrpe-external-master interface doesn't handle a lot of logic,
    # use the charm-helpers code for now.
    hostname = nrpe.get_nagios_hostname()
    nrpe_setup = nrpe.NRPE(hostname=hostname)

    for service in worker_services:
        nrpe_setup.remove_check(shortname=service)
    nrpe_setup.remove_check(shortname="node")
Ejemplo n.º 30
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_files_dir = os.path.join(
        charm_dir(), 'charmhelpers', 'contrib', 'openstack', 'files')
    nrpe.copy_nrpe_checks(nrpe_files_dir=nrpe_files_dir)
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
    nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    nrpe_setup.write()
Ejemplo n.º 31
0
def configure_nrpe(unused=None):
    hookenv.log('Configuring nrpe checks for services: '
                '{}'.format(MONITORED_SERVICES))
    # The current nrpe-external-master interface doesn't handle a lot of logic,
    # use the charm-helpers code for now.
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False)
    nrpe.add_init_service_checks(nrpe_setup, MONITORED_SERVICES, current_unit)
    nrpe_setup.write()

    set_state('nrpe-external-master.initial-config')
def update_nrpe_checks():
    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts',
                           'check_rabbitmq.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py'))
        rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts',
                           'check_rabbitmq_queues.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_queues.py'))
    if config('stats_cron_schedule'):
        script = os.path.join(SCRIPTS_DIR, 'collect_rabbitmq_stats.sh')
        cronjob = CRONJOB_CMD.format(schedule=config('stats_cron_schedule'),
                                     timeout=config('cron-timeout'),
                                     command=script)
        rsync(os.path.join(charm_dir(), 'scripts',
                           'collect_rabbitmq_stats.sh'), script)
        write_file(STATS_CRONFILE, cronjob)
    elif os.path.isfile(STATS_CRONFILE):
        os.remove(STATS_CRONFILE)

    # Find out if nrpe set nagios_hostname
    hostname = nrpe.get_nagios_hostname()
    myunit = nrpe.get_nagios_unit_name()

    # create unique user and vhost for each unit
    current_unit = local_unit().replace('/', '-')
    user = '******' % current_unit
    vhost = 'nagios-%s' % current_unit
    password = rabbit.get_rabbit_password(user, local=True)

    rabbit.create_vhost(vhost)
    rabbit.create_user(user, password)
    rabbit.grant_permissions(user, vhost)

    nrpe_compat = nrpe.NRPE(hostname=hostname)
    nrpe_compat.add_check(
        shortname=rabbit.RABBIT_USER,
        description='Check RabbitMQ {%s}' % myunit,
        check_cmd='{}/check_rabbitmq.py --user {} --password {} --vhost {}'
                  ''.format(NAGIOS_PLUGINS, user, password, vhost)
    )
    if config('queue_thresholds'):
        cmd = ""
        # If value of queue_thresholds is incorrect we want the hook to fail
        for item in yaml.safe_load(config('queue_thresholds')):
            cmd += ' -c "{}" "{}" {} {}'.format(*item)
        nrpe_compat.add_check(
            shortname=rabbit.RABBIT_USER + '_queue',
            description='Check RabbitMQ Queues',
            check_cmd='{}/check_rabbitmq_queues.py{} {}'.format(
                        NAGIOS_PLUGINS, cmd, STATS_DATAFILE)
        )
    nrpe_compat.write()
Ejemplo n.º 33
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_setup.add_check(
        shortname='ceph-osd',
        description='process check {%s}' % current_unit,
        check_cmd=('/bin/cat /var/lib/ceph/osd/ceph-*/whoami |'
                   'xargs -I@ status ceph-osd id=@ && exit 0 || exit 2')
    )
    nrpe_setup.write()
Ejemplo n.º 34
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    if not install_nrpe_deps():
        return

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_setup.add_check(
        shortname='elasticsearch-cluster-status',
        description='Elasticsearch cluster status check {%s}' % current_unit,
        check_cmd=('/usr/local/bin/check-elasticsearch'))
    nrpe_setup.write()
Ejemplo n.º 35
0
def remove_nrpe_config(nagios=None):
    remove_state('nrpe-external-master.initial-config')

    # List of systemd services for which the checks will be removed
    services = ('snap.kubelet.daemon', 'snap.kube-proxy.daemon')

    # The current nrpe-external-master interface doesn't handle a lot of logic,
    # use the charm-helpers code for now.
    hostname = nrpe.get_nagios_hostname()
    nrpe_setup = nrpe.NRPE(hostname=hostname)

    for service in services:
        nrpe_setup.remove_check(shortname=service)
Ejemplo n.º 36
0
def remove_nrpe_config(nagios=None):
    remove_state('nrpe-external-master.initial-config')

    # List of systemd services for which the checks will be removed
    services = ('snap.kubelet.daemon', 'snap.kube-proxy.daemon')

    # The current nrpe-external-master interface doesn't handle a lot of logic,
    # use the charm-helpers code for now.
    hostname = nrpe.get_nagios_hostname()
    nrpe_setup = nrpe.NRPE(hostname=hostname)

    for service in services:
        nrpe_setup.remove_check(shortname=service)
Ejemplo n.º 37
0
def remove_nrpe_config(nagios=None):
    remove_state(NPRE_EXTERNAL_RELATION + ".initial-config")

    # List of systemd services for which the checks will be removed
    services = ("snap.etcd.etcd", )

    # The current nrpe-external interface doesn't handle a lot of logic,
    # use the charm-helpers code for now.
    hostname = nrpe.get_nagios_hostname()
    nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False)

    for service in services:
        nrpe_setup.remove_check(shortname=service)
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, 'mysql', current_unit)
    nrpe_setup.add_check(
        shortname='mysql_proc',
        description='Check MySQL process {%s}' % current_unit,
        check_cmd='check_procs -c 1:1 -C mysqld'
    )
    nrpe_setup.write()
def update_nrpe_checks():
    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts',
                           'check_rabbitmq.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py'))
        rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts',
                           'check_rabbitmq_queues.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_queues.py'))
    if config('stats_cron_schedule'):
        script = os.path.join(SCRIPTS_DIR, 'collect_rabbitmq_stats.sh')
        cronjob = "{} root {}\n".format(config('stats_cron_schedule'), script)
        rsync(os.path.join(charm_dir(), 'scripts',
                           'collect_rabbitmq_stats.sh'), script)
        write_file(STATS_CRONFILE, cronjob)
    elif os.path.isfile(STATS_CRONFILE):
        os.remove(STATS_CRONFILE)

    # Find out if nrpe set nagios_hostname
    hostname = nrpe.get_nagios_hostname()
    myunit = nrpe.get_nagios_unit_name()

    # create unique user and vhost for each unit
    current_unit = local_unit().replace('/', '-')
    user = '******' % current_unit
    vhost = 'nagios-%s' % current_unit
    password = rabbit.get_rabbit_password(user, local=True)

    rabbit.create_vhost(vhost)
    rabbit.create_user(user, password)
    rabbit.grant_permissions(user, vhost)

    nrpe_compat = nrpe.NRPE(hostname=hostname)
    nrpe_compat.add_check(
        shortname=rabbit.RABBIT_USER,
        description='Check RabbitMQ {%s}' % myunit,
        check_cmd='{}/check_rabbitmq.py --user {} --password {} --vhost {}'
                  ''.format(NAGIOS_PLUGINS, user, password, vhost)
    )
    if config('queue_thresholds'):
        cmd = ""
        # If value of queue_thresholds is incorrect we want the hook to fail
        for item in yaml.safe_load(config('queue_thresholds')):
            cmd += ' -c "{}" "{}" {} {}'.format(*item)
        nrpe_compat.add_check(
            shortname=rabbit.RABBIT_USER + '_queue',
            description='Check RabbitMQ Queues',
            check_cmd='{}/check_rabbitmq_queues.py{} {}'.format(
                        NAGIOS_PLUGINS, cmd, STATS_DATAFILE)
        )
    nrpe_compat.write()
Ejemplo n.º 40
0
def update_nrpe_config():
    """
    :return: None
    """
    # List of systemd services that will be checked.
    services = ['docker']

    # The current nrpe-external-master interface doesn't handle a lot of logic,
    # use the charm-helpers code for now.
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, services, current_unit)
    nrpe_setup.write()
Ejemplo n.º 41
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    monitored_services = services()
    try:
        # qemu-kvm is a one-shot service
        monitored_services.remove('qemu-kvm')
    except ValueError:
        pass
    nrpe.add_init_service_checks(nrpe_setup, monitored_services, current_unit)
    nrpe_setup.write()
Ejemplo n.º 42
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.copy_nrpe_checks()
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
    if service_enabled('api'):
        nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    else:
        nrpe.remove_deprecated_check(nrpe_setup,
                                     ["haproxy_servers", "haproxy_queue"])
    nrpe_setup.write()
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    monitored_services = services()
    try:
        # qemu-kvm is a one-shot service
        monitored_services.remove('qemu-kvm')
    except ValueError:
        pass
    nrpe.add_init_service_checks(nrpe_setup, monitored_services, current_unit)
    nrpe_setup.write()
Ejemplo n.º 44
0
def update_nrpe_config():
    scripts_src = os.path.join(os.environ["CHARM_DIR"], "files",
                               "nrpe")

    scripts_dst = "/usr/local/lib/nagios/plugins"
    if not os.path.exists(scripts_dst):
        os.makedirs(scripts_dst)
    for fname in glob.glob(os.path.join(scripts_src, "*")):
        if os.path.isfile(fname):
            shutil.copy2(fname,
                         os.path.join(scripts_dst, os.path.basename(fname)))

    sudoers_src = os.path.join(os.environ["CHARM_DIR"], "files",
                               "sudoers")
    sudoers_dst = "/etc/sudoers.d"
    for fname in glob.glob(os.path.join(sudoers_src, "*")):
        if os.path.isfile(fname):
            shutil.copy2(fname,
                         os.path.join(sudoers_dst, os.path.basename(fname)))

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()

    nrpe_setup = nrpe.NRPE(hostname=hostname)

    apt_install('python-dbus')

    # corosync/crm checks
    nrpe_setup.add_check(
        shortname='corosync_rings',
        description='Check Corosync rings {%s}' % current_unit,
        check_cmd='check_corosync_rings')
    nrpe_setup.add_check(
        shortname='crm_status',
        description='Check crm status {%s}' % current_unit,
        check_cmd='check_crm')

    # process checks
    nrpe_setup.add_check(
        shortname='corosync_proc',
        description='Check Corosync process {%s}' % current_unit,
        check_cmd='check_procs -c 1:1 -C corosync'
    )
    nrpe_setup.add_check(
        shortname='pacemakerd_proc',
        description='Check Pacemakerd process {%s}' % current_unit,
        check_cmd='check_procs -c 1:1 -C pacemakerd'
    )

    nrpe_setup.write()
Ejemplo n.º 45
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    # fasteners is used by apt_install collect_ceph_osd_services.py
    pkgs = ['python3-dbus']
    if CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'bionic':
        pkgs.append('python3-fasteners')
    apt_install(pkgs)

    # copy the check and collect files over to the plugins directory
    charm_dir = os.environ.get('CHARM_DIR', '')
    nagios_plugins = '/usr/local/lib/nagios/plugins'
    # Grab nagios user/group ID's from original source
    _dir = os.stat(nagios_plugins)
    uid = _dir.st_uid
    gid = _dir.st_gid
    for name in ('collect_ceph_osd_services.py', 'check_ceph_osd_services.py'):
        target = os.path.join(nagios_plugins, name)
        shutil.copy(os.path.join(charm_dir, 'files', 'nagios', name), target)
        os.chown(target, uid, gid)

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()

    # BUG#1810749 - the nagios user can't access /var/lib/ceph/.. and that's a
    # GOOD THING, as it keeps ceph secure from Nagios.  However, to check
    # whether ceph is okay, the check_systemd.py or 'status ceph-osd' still
    # needs to be called with the contents of ../osd/ceph-*/whoami files.  To
    # get around this conundrum, instead a cron.d job that runs as root will
    # perform the checks every minute, and write to a tempory file the results,
    # and the nrpe check will grep this file and error out (return 2) if the
    # first 3 characters of a line are not 'OK:'.

    cmd = ('MAILTO=""\n'
           '* * * * * root '
           '/usr/local/lib/nagios/plugins/collect_ceph_osd_services.py'
           ' 2>&1 | logger -t check-osd\n')
    with open(CRON_CEPH_CHECK_FILE, "wt") as f:
        f.write(cmd)

    nrpe_cmd = '/usr/local/lib/nagios/plugins/check_ceph_osd_services.py'

    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_setup.add_check(
        shortname='ceph-osd',
        description='process check {%s}' % current_unit,
        check_cmd=nrpe_cmd
    )
    nrpe_setup.write()
Ejemplo n.º 46
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    log('Updating NRPE configuration')
    status_set('maintenance', 'Updating NRPE configuration')
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.copy_nrpe_checks()
    _services = []
    for service in services():
        if service.startswith('snap.'):
            service = service.split('.')[1]
        _services.append(service)
    nrpe.add_init_service_checks(nrpe_setup, _services, current_unit)
    nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    nrpe_setup.write()
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.copy_nrpe_checks()
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
    nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    conf = nrpe_setup.config
    check_http_params = conf.get('nagios_check_http_params')
    if check_http_params:
        nrpe_setup.add_check(
            shortname='vhost',
            description='Check Virtual Host {%s}' % current_unit,
            check_cmd='check_http %s' % check_http_params
        )
    nrpe_setup.write()
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)

    cronpath = '/etc/cron.d/nagios-netns-check'
    cron_template = ('*/5 * * * * root '
                     '/usr/local/lib/nagios/plugins/check_netns.sh '
                     '> /var/lib/nagios/netns-check.txt\n'
                     )
    f = open(cronpath, 'w')
    f.write(cron_template)
    f.close()
    nrpe_setup.add_check(
        shortname="netns",
        description='Network Namespace check {%s}' % current_unit,
        check_cmd='check_status_file.py -f /var/lib/nagios/netns-check.txt'
    )
    nrpe_setup.write()
def update_nrpe_checks():
    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(os.path.join(charm_dir(), 'scripts',
                           'check_rabbitmq.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py'))
        rsync(os.path.join(charm_dir(), 'scripts',
                           'check_rabbitmq_queues.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_queues.py'))
        if config('management_plugin'):
            rsync(os.path.join(charm_dir(), 'scripts',
                               'check_rabbitmq_cluster.py'),
                  os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_cluster.py'))

    if config('stats_cron_schedule'):
        script = os.path.join(SCRIPTS_DIR, 'collect_rabbitmq_stats.sh')
        cronjob = CRONJOB_CMD.format(schedule=config('stats_cron_schedule'),
                                     timeout=config('cron-timeout'),
                                     command=script)
        rsync(os.path.join(charm_dir(), 'scripts',
                           'collect_rabbitmq_stats.sh'), script)
        write_file(STATS_CRONFILE, cronjob)
    elif os.path.isfile(STATS_CRONFILE):
        os.remove(STATS_CRONFILE)

    # Find out if nrpe set nagios_hostname
    hostname = nrpe.get_nagios_hostname()
    myunit = nrpe.get_nagios_unit_name()

    # create unique user and vhost for each unit
    current_unit = local_unit().replace('/', '-')
    user = '******' % current_unit
    vhost = 'nagios-%s' % current_unit
    password = rabbit.get_rabbit_password(user, local=True)

    rabbit.create_vhost(vhost)
    rabbit.create_user(user, password, ['monitoring'])
    rabbit.grant_permissions(user, vhost)

    nrpe_compat = nrpe.NRPE(hostname=hostname)
    if config('ssl') in ['off', 'on']:
        cmd = ('{plugins_dir}/check_rabbitmq.py --user {user} '
               '--password {password} --vhost {vhost}')
        cmd = cmd.format(plugins_dir=NAGIOS_PLUGINS, user=user,
                         password=password, vhost=vhost)
        nrpe_compat.add_check(
            shortname=rabbit.RABBIT_USER,
            description='Check RabbitMQ {%s}' % myunit,
            check_cmd=cmd
        )
    if config('ssl') in ['only', 'on']:
        log('Adding rabbitmq SSL check', level=DEBUG)
        cmd = ('{plugins_dir}/check_rabbitmq.py --user {user} '
               '--password {password} --vhost {vhost} '
               '--ssl --ssl-ca {ssl_ca} --port {port}')
        cmd = cmd.format(plugins_dir=NAGIOS_PLUGINS,
                         user=user,
                         password=password,
                         port=int(config('ssl_port')),
                         vhost=vhost,
                         ssl_ca=SSL_CA_FILE)
        nrpe_compat.add_check(
            shortname=rabbit.RABBIT_USER + "_ssl",
            description='Check RabbitMQ (SSL) {%s}' % myunit,
            check_cmd=cmd
        )

    if config('queue_thresholds'):
        cmd = ""
        # If value of queue_thresholds is incorrect we want the hook to fail
        for item in yaml.safe_load(config('queue_thresholds')):
            cmd += ' -c "{}" "{}" {} {}'.format(*item)
        nrpe_compat.add_check(
            shortname=rabbit.RABBIT_USER + '_queue',
            description='Check RabbitMQ Queues',
            check_cmd='{}/check_rabbitmq_queues.py{} {}'.format(
                        NAGIOS_PLUGINS, cmd, STATS_DATAFILE)
        )
    if config('management_plugin'):
        # add NRPE check
        nrpe_compat.add_check(
            shortname=rabbit.RABBIT_USER + '_cluster',
            description='Check RabbitMQ Cluster',
            check_cmd='{}/check_rabbitmq_cluster.py --port {} --user {} --password {}'.format(
                        NAGIOS_PLUGINS,
                        rabbit.get_managment_port(),
                        user,
                        password
            )
        )

    nrpe_compat.write()