コード例 #1
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.copy_nrpe_checks()
    _services = []
    for service in services():
        if service.startswith('snap.'):
            service = service.split('.')[1]
        _services.append(service)
    nrpe.add_init_service_checks(nrpe_setup, _services, current_unit)
    nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    nrpe_setup.write()
コード例 #2
0
def remove_nrpe_config(nagios=None):
    remove_state('nrpe-external-master.initial-config')

    # List of systemd services for which the checks will be removed
    services = ('snap.kube-apiserver.daemon',
                'snap.kube-controller-manager.daemon',
                'snap.kube-scheduler.daemon')

    # The current nrpe-external-master interface doesn't handle a lot of logic,
    # use the charm-helpers code for now.
    hostname = nrpe.get_nagios_hostname()
    nrpe_setup = nrpe.NRPE(hostname=hostname)

    for service in services:
        nrpe_setup.remove_check(shortname=service)
コード例 #3
0
 def render_nrpe(self):
     """Configure Nagios NRPE checks."""
     ch_core.hookenv.log("Rendering NRPE checks.",
                         level=ch_core.hookenv.INFO)
     hostname = nrpe.get_nagios_hostname()
     current_unit = nrpe.get_nagios_unit_name()
     # Determine if this is a subordinate unit or not
     if ch_core.hookenv.principal_unit() == ch_core.hookenv.local_unit():
         primary = True
     else:
         primary = False
     charm_nrpe = nrpe.NRPE(hostname=hostname, primary=primary)
     nrpe.add_init_service_checks(
         charm_nrpe, self.nrpe_check_services, current_unit)
     charm_nrpe.write()
コード例 #4
0
ファイル: ceph_hooks.py プロジェクト: RatiRanjanDas/test32
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    # fasteners is used by apt_install collect_ceph_osd_services.py
    pkgs = ['python3-dbus']
    if CompareHostReleases(lsb_release()['DISTRIB_CODENAME']) >= 'bionic':
        pkgs.append('python3-fasteners')
    apt_install(pkgs)

    # copy the check and collect files over to the plugins directory
    charm_dir = os.environ.get('CHARM_DIR', '')
    nagios_plugins = '/usr/local/lib/nagios/plugins'
    # Grab nagios user/group ID's from original source
    _dir = os.stat(nagios_plugins)
    uid = _dir.st_uid
    gid = _dir.st_gid
    for name in ('collect_ceph_osd_services.py', 'check_ceph_osd_services.py'):
        target = os.path.join(nagios_plugins, name)
        shutil.copy(os.path.join(charm_dir, 'files', 'nagios', name), target)
        os.chown(target, uid, gid)

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()

    # BUG#1810749 - the nagios user can't access /var/lib/ceph/.. and that's a
    # GOOD THING, as it keeps ceph secure from Nagios.  However, to check
    # whether ceph is okay, the check_systemd.py or 'status ceph-osd' still
    # needs to be called with the contents of ../osd/ceph-*/whoami files.  To
    # get around this conundrum, instead a cron.d job that runs as root will
    # perform the checks every minute, and write to a tempory file the results,
    # and the nrpe check will grep this file and error out (return 2) if the
    # first 3 characters of a line are not 'OK:'.

    cmd = ('MAILTO=""\n'
           '* * * * * root '
           '/usr/local/lib/nagios/plugins/collect_ceph_osd_services.py'
           ' 2>&1 | logger -t check-osd\n')
    with open(CRON_CEPH_CHECK_FILE, "wt") as f:
        f.write(cmd)

    nrpe_cmd = '/usr/local/lib/nagios/plugins/check_ceph_osd_services.py'

    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_setup.add_check(
        shortname='ceph-osd',
        description='process check {%s}' % current_unit,
        check_cmd=nrpe_cmd
    )
    nrpe_setup.write()
コード例 #5
0
def update_nrpe_config(unused=None):
    # List of systemd services that will be checked
    services = ("snap.etcd.etcd", )

    # The current nrpe-external interface doesn't handle a lot of logic,
    # use the charm-helpers code for now.
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False)
    # add our first check, to alert on service failure
    nrpe.add_init_service_checks(nrpe_setup, services, current_unit)

    # add the cron job to populate the cache for our second check
    # (we cache the output of 'etcdctl alarm list' to minimise overhead)
    with open("templates/check_etcd-alarms.cron") as fp:
        write_file(
            path="/etc/cron.d/check_etcd-alarms",
            content=fp.read().encode(),
            owner="root",
            perms=0o644,
        )

    # create an empty output file for the above
    write_file(
        path="/var/lib/nagios/etcd-alarm-list.txt",
        content="",
        owner="root",
        perms=0o644,
    )

    # install the NRPE script for the above
    with open("templates/check_etcd-alarms.py") as fp:
        write_file(
            path="/usr/lib/nagios/plugins/check_etcd-alarms.py",
            content=fp.read().encode(),
            owner="root",
            perms=0o755,
        )

    # define our second check, to alert on etcd alarm status
    nrpe_setup.add_check(
        "etcd-alarms",
        "Verify etcd has no raised alarms",
        "/usr/lib/nagios/plugins/check_etcd-alarms.py",
    )

    nrpe_setup.write()
    set_state("etcd.nrpe.configured")
コード例 #6
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.copy_nrpe_checks()
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
    nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    api_port = determine_api_port(config('bind-port'), singlenode_mode=True)
    nrpe_setup.add_check(shortname="swift-proxy-healthcheck",
                         description="Check Swift Proxy Healthcheck",
                         check_cmd="/usr/lib/nagios/plugins/check_http \
                  -I localhost -u /healthcheck -p {} \
                  -e \"OK\"".format(api_port))
    nrpe_setup.write()
コード例 #7
0
ファイル: docker.py プロジェクト: barryprice/charm-docker
def remove_nrpe_config():
    """
    :return: None
    """
    remove_state('nrpe-external-master.docker.initial-config')

    # List of systemd services for which the checks will be removed.
    services = ['docker']

    # The current nrpe-external-master interface doesn't handle a lot of logic,
    # use the charm-helpers code for now.
    hostname = nrpe.get_nagios_hostname()
    nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False)

    for service in services:
        nrpe_setup.remove_check(shortname=service)
コード例 #8
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.copy_nrpe_checks()
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)
    nrpe.add_haproxy_checks(nrpe_setup, current_unit)
    conf = nrpe_setup.config
    check_http_params = conf.get('nagios_check_http_params')
    if check_http_params:
        nrpe_setup.add_check(shortname='vhost',
                             description='Check Virtual Host {%s}' %
                             current_unit,
                             check_cmd='check_http %s' % check_http_params)
    nrpe_setup.write()
コード例 #9
0
    def test_write_restarts_service(self):
        self.patched['config'].return_value = {
            'nagios_context': 'test',
            'nagios_servicegroups': ''
        }
        self.patched['exists'].return_value = True
        checker = nrpe.NRPE()

        self.assertEqual(None, checker.write())

        expected = ['service', 'nagios-nrpe-server', 'restart']
        self.assertEqual(expected, self.patched['call'].call_args[0][0])
        self.check_call_counts(config=1,
                               getpwnam=1,
                               getgrnam=1,
                               exists=1,
                               call=1)
コード例 #10
0
ファイル: test_nrpe.py プロジェクト: fnordahl/charm-helpers
    def test_write_restarts_service(self):
        self.patched['config'].return_value = {
            'nagios_context': 'test',
            'nagios_servicegroups': ''
        }
        self.patched['exists'].return_value = True
        checker = nrpe.NRPE()

        self.assertEqual(None, checker.write())

        self.patched['service'].assert_called_with('restart',
                                                   'nagios-nrpe-server')
        self.check_call_counts(config=1,
                               getpwnam=1,
                               getgrnam=1,
                               exists=1,
                               service=1)
コード例 #11
0
def configure_megaraid():
    status_set('maintenance', 'configuring megaraid check')
    install_nagios_plugin_from_file(source_file_path='/opt/{}/{}'.format(
        PLUGIN_NAME, PLUGIN_NAME),
                                    plugin_name=PLUGIN_NAME)
    hostname = nrpe.get_nagios_hostname()
    nrpe_setup = nrpe.NRPE(hostname=hostname, primary=False)
    nrpe_setup.add_check(
        shortname=PLUGIN_NAME,
        description=PLUGIN_NAME,
        check_cmd='{plugin_name} -p {storcli_path} {check_params}'.format(
            plugin_name=PLUGIN_NAME,
            storcli_path=config('storcli_path'),
            check_params=config('check_parameters')))
    nrpe_setup.write()
    status_set('active', 'ready')
    set_flag('megaraid.configured')
コード例 #12
0
def update_nrpe_config():
    scripts_src = os.path.join(os.environ["CHARM_DIR"], "files", "nrpe")
    scripts_dst = "/usr/local/lib/nagios/plugins"
    if not os.path.exists(scripts_dst):
        os.makedirs(scripts_dst)
    for fname in glob.glob(os.path.join(scripts_src, "*")):
        if os.path.isfile(fname):
            shutil.copy2(fname,
                         os.path.join(scripts_dst, os.path.basename(fname)))

    sudoers_src = os.path.join(os.environ["CHARM_DIR"], "files", "sudoers")
    sudoers_dst = "/etc/sudoers.d"
    for fname in glob.glob(os.path.join(sudoers_src, "*")):
        if os.path.isfile(fname):
            shutil.copy2(fname,
                         os.path.join(sudoers_dst, os.path.basename(fname)))

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()

    nrpe_setup = nrpe.NRPE(hostname=hostname)

    apt_install('python-dbus')

    # corosync/crm checks
    nrpe_setup.add_check(shortname='corosync_rings',
                         description='Check Corosync rings {%s}' %
                         current_unit,
                         check_cmd='check_corosync_rings')
    nrpe_setup.add_check(shortname='crm_status',
                         description='Check crm status {%s}' % current_unit,
                         check_cmd='check_crm')

    # process checks
    nrpe_setup.add_check(shortname='corosync_proc',
                         description='Check Corosync process {%s}' %
                         current_unit,
                         check_cmd='check_procs -c 1:1 -C corosync')
    nrpe_setup.add_check(shortname='pacemakerd_proc',
                         description='Check Pacemakerd process {%s}' %
                         current_unit,
                         check_cmd='check_procs -c 1:1 -C pacemakerd')

    nrpe_setup.write()
コード例 #13
0
ファイル: ceph_hooks.py プロジェクト: aasbin/charm-ceph-osd
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python3-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()

    # create systemd or upstart check
    cmd = '/bin/cat /var/lib/ceph/osd/ceph-*/whoami |'
    if init_is_systemd():
        cmd += 'xargs -I_@ /usr/local/lib/nagios/plugins/check_systemd.py'
        cmd += ' ceph-osd@_@'
    else:
        cmd += 'xargs -I@ status ceph-osd id=@'
    cmd += ' && exit 0 || exit 2'

    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_setup.add_check(shortname='ceph-osd',
                         description='process check {%s}' % current_unit,
                         check_cmd=cmd)
    nrpe_setup.write()
コード例 #14
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, services(), current_unit)

    cronpath = '/etc/cron.d/nagios-netns-check'
    cron_template = ('*/5 * * * * root '
                     '/usr/local/lib/nagios/plugins/check_netns.sh '
                     '> /var/lib/nagios/netns-check.txt\n')
    f = open(cronpath, 'w')
    f.write(cron_template)
    f.close()
    nrpe_setup.add_check(
        shortname="netns",
        description='Network Namespace check {%s}' % current_unit,
        check_cmd='check_status_file.py -f /var/lib/nagios/netns-check.txt')
    nrpe_setup.write()
コード例 #15
0
def nrpe_external_master_relation():
    ''' Configure the nrpe-external-master relation '''
    local_plugins = helpers.local_plugins_dir()
    if os.path.exists(local_plugins):
        src = os.path.join(hookenv.charm_dir(), "files",
                           "check_cassandra_heap.sh")
        with open(src, 'rb') as f:
            host.write_file(os.path.join(local_plugins,
                                         'check_cassandra_heap.sh'),
                            f.read(),
                            perms=0o555)

    nrpe_compat = nrpe.NRPE()
    conf = hookenv.config()

    cassandra_heap_warn = conf.get('nagios_heapchk_warn_pct')
    cassandra_heap_crit = conf.get('nagios_heapchk_crit_pct')
    if cassandra_heap_warn and cassandra_heap_crit:
        nrpe_compat.add_check(
            shortname="cassandra_heap",
            description="Check Cassandra Heap",
            check_cmd="check_cassandra_heap.sh localhost {} {}"
            "".format(cassandra_heap_warn, cassandra_heap_crit))

    cassandra_disk_warn = conf.get('nagios_disk_warn_pct')
    cassandra_disk_crit = conf.get('nagios_disk_crit_pct')
    dirs = helpers.get_all_database_directories()
    dirs = set(dirs['data_file_directories'] +
               [dirs['commitlog_directory'], dirs['saved_caches_directory']])
    for disk in dirs:
        check_name = re.sub('[^A-Za-z0-9_]', '_', disk)
        if cassandra_disk_warn and cassandra_disk_crit:
            shortname = "cassandra_disk{}".format(check_name)
            hookenv.log("Adding disk utilization check {}".format(shortname),
                        DEBUG)
            nrpe_compat.add_check(
                shortname=shortname,
                description="Check Cassandra Disk {}".format(disk),
                check_cmd="check_disk -u GB -w {}% -c {}% -K 5% -p {}"
                "".format(cassandra_disk_warn, cassandra_disk_crit, disk))
    nrpe_compat.write()
コード例 #16
0
ファイル: netbox.py プロジェクト: huntdatacenter/charm-netbox
def configure_nrpe_checks():
    install_nagios_plugin_from_file(
        source_file_path='/opt/netbox-docker/checks/check_docker',
        plugin_name='check_docker')
    containers = ['netbox', 'netbox-worker', 'nginx', 'redis']
    nrpe_setup = nrpe.NRPE(hostname=nrpe.get_nagios_hostname(), primary=True)
    nrpe_setup.add_check(shortname='check_http_netbox',
                         description='Check netbox web server',
                         check_cmd='{check_path} -H localhost -p 80'.format(
                             check_path='/usr/lib/nagios/plugins/check_http'))
    for container in containers:
        nrpe_setup.add_check(
            shortname='check_docker_{container}'.format(container=container),
            description='Check netbox {container} container'.format(
                container=container),
            check_cmd='{check_path} --containers {container} {params}'.format(
                check_path='/usr/lib/nagios/plugins/check_docker',
                container=container,
                params=config('check_docker_params')))
    nrpe_setup.write()
    set_flag('netbox.nrpe.configured')
コード例 #17
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    # python-psutil is used by check_ntpmon
    fetch.apt_install(['python-dbus', 'python-psutil'])
    nagios_ntpmon_checks = hookenv.config('nagios_ntpmon_checks').split(" ")
    if os.path.isdir(NAGIOS_PLUGINS):
        host.rsync(
            os.path.join(os.getenv('CHARM_DIR'), 'files', 'nagios',
                         'check_ntpmon.py'),
            os.path.join(NAGIOS_PLUGINS, 'check_ntpmon.py'))

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe.add_init_service_checks(nrpe_setup, ['ntp'], current_unit)

    allchecks = set(['offset', 'peers', 'reachability', 'sync'])

    # remove any previously-created ntpmon checks
    nrpe_setup.remove_check(shortname="ntpmon")
    for c in allchecks:
        nrpe_setup.remove_check(shortname="ntpmon_%s" % c)

    # If all checks are specified, combine them into a single check to reduce
    # Nagios noise.
    if set(nagios_ntpmon_checks) == allchecks:
        nrpe_setup.add_check(
            shortname="ntpmon",
            description='Check NTPmon {}'.format(current_unit),
            check_cmd='check_ntpmon.py')
    else:
        for nc in nagios_ntpmon_checks:
            if len(nc) > 0:
                nrpe_setup.add_check(
                    shortname="ntpmon_%s" % nc,
                    description='Check NTPmon %s {%s}' % (nc, current_unit),
                    check_cmd='check_ntpmon.py --check %s' % nc)

    nrpe_setup.write()
コード例 #18
0
def update_nrpe_config():
    plugins_dir = '/usr/local/lib/nagios/plugins'
    nrpe_compat = nrpe.NRPE()
    component_ip = common_utils.get_ip()
    common_utils.rsync_nrpe_checks(plugins_dir)
    common_utils.add_nagios_to_sudoers()

    check_api_cmd = 'check_http -H {} -p 8081'.format(component_ip)
    nrpe_compat.add_check(
        shortname='check_analytics_api',
        description='Check Contrail Analytics API',
        check_cmd=check_api_cmd
    )

    ctl_status_shortname = 'check_contrail_status_' + MODULE
    nrpe_compat.add_check(
        shortname=ctl_status_shortname,
        description='Check contrail-status',
        check_cmd=common_utils.contrail_status_cmd(MODULE, plugins_dir)
    )

    nrpe_compat.write()
コード例 #19
0
def update_nrpe_config():
    services = ["snap.{}.daemon".format(s) for s in worker_services]
    data = render("nagios_plugin.py", None, {"node_name": get_node_name()})
    plugin_path = install_nagios_plugin_from_text(data, "check_k8s_worker.py")
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_setup.add_check("node", "Node registered with API Server", str(plugin_path))
    nrpe.add_init_service_checks(nrpe_setup, services, current_unit)
    nrpe_setup.write()

    creds = db.get("credentials")
    servers = get_kube_api_servers()
    if creds and servers:
        server = servers[get_unit_number() % len(servers)]
        create_kubeconfig(
            nrpe_kubeconfig_path,
            server,
            ca_crt_path,
            token=creds["client_token"],
            user="******",
        )
        # Make sure Nagios dirs are the correct permissions.
        cmd = ["chown", "-R", "nagios:nagios"]
        for p in ["/var/lib/nagios/", os.path.dirname(nrpe_kubeconfig_path)]:
            if os.path.exists(p):
                check_call(cmd + [p])

        remove_state("nrpe-external-master.reconfigure")
        set_state("nrpe-external-master.initial-config")
    # request CPU governor check from nrpe relation to be performance
    rel_settings = {
        "requested_cpu_governor": "performance",
    }
    for rid in hookenv.relation_ids("nrpe-external-master"):
        hookenv.relation_set(relation_id=rid, relation_settings=rel_settings)
コード例 #20
0
def update_nrpe_checks():
    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(os.path.join(charm_dir(), 'files', 'check_rabbitmq.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py'))
        rsync(os.path.join(charm_dir(), 'files', 'check_rabbitmq_queues.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_queues.py'))
        if config('management_plugin'):
            rsync(
                os.path.join(charm_dir(), 'files',
                             'check_rabbitmq_cluster.py'),
                os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_cluster.py'))

    if config('stats_cron_schedule'):
        script = os.path.join(SCRIPTS_DIR, 'collect_rabbitmq_stats.sh')
        cronjob = CRONJOB_CMD.format(schedule=config('stats_cron_schedule'),
                                     timeout=config('cron-timeout'),
                                     command=script)
        rsync(os.path.join(charm_dir(), 'files', 'collect_rabbitmq_stats.sh'),
              script)
        write_file(STATS_CRONFILE, cronjob)
    elif os.path.isfile(STATS_CRONFILE):
        os.remove(STATS_CRONFILE)

    # Find out if nrpe set nagios_hostname
    hostname = nrpe.get_nagios_hostname()
    myunit = nrpe.get_nagios_unit_name()

    # create unique user and vhost for each unit
    current_unit = local_unit().replace('/', '-')
    user = '******'.format(current_unit)
    vhosts = [{'vhost': user, 'shortname': rabbit.RABBIT_USER}]
    password = rabbit.get_rabbit_password(user, local=True)

    nrpe_compat = nrpe.NRPE(hostname=hostname)
    rabbit.create_user(user, password, ['monitoring'])

    if config('check-vhosts'):
        for other_vhost in config('check-vhosts').split(' '):
            if other_vhost:
                item = {
                    'vhost': other_vhost,
                    'shortname': 'rabbit_{}'.format(other_vhost)
                }
                vhosts.append(item)

    for vhost in vhosts:
        rabbit.create_vhost(vhost['vhost'])
        rabbit.grant_permissions(user, vhost['vhost'])
        if config('ssl') in ['off', 'on']:
            cmd = ('{}/check_rabbitmq.py --user {} --password {} '
                   '--vhost {}'.format(NAGIOS_PLUGINS, user, password,
                                       vhost['vhost']))
            log('Adding rabbitmq non-SSL check for {}'.format(vhost['vhost']),
                level=DEBUG)
            description = 'Check RabbitMQ {} {}'.format(myunit, vhost['vhost'])
            nrpe_compat.add_check(shortname=vhost['shortname'],
                                  description=description,
                                  check_cmd=cmd)

        if config('ssl') in ['only', 'on']:
            cmd = ('{}/check_rabbitmq.py --user {} --password {} '
                   '--vhost {} --ssl --ssl-ca {} --port {}'.format(
                       NAGIOS_PLUGINS, user, password, vhost['vhost'],
                       SSL_CA_FILE, int(config('ssl_port'))))
            log('Adding rabbitmq SSL check for {}'.format(vhost['vhost']),
                level=DEBUG)
            description = 'Check RabbitMQ (SSL) {} {}'.format(
                myunit, vhost['vhost'])
            nrpe_compat.add_check(shortname=vhost['shortname'] + "_ssl",
                                  description=description,
                                  check_cmd=cmd)

    if config('queue_thresholds'):
        cmd = ""
        # If value of queue_thresholds is incorrect we want the hook to fail
        for item in yaml.safe_load(config('queue_thresholds')):
            cmd += ' -c "{}" "{}" {} {}'.format(*item)
        nrpe_compat.add_check(
            shortname=rabbit.RABBIT_USER + '_queue',
            description='Check RabbitMQ Queues',
            check_cmd='{}/check_rabbitmq_queues.py{} {}'.format(
                NAGIOS_PLUGINS, cmd, STATS_DATAFILE))
    if config('management_plugin'):
        # add NRPE check
        _check_cmd = (
            '{}/check_rabbitmq_cluster.py --port {} --user {} --password {}'.
            format(NAGIOS_PLUGINS, rabbit.get_managment_port(), user,
                   password))
        nrpe_compat.add_check(shortname=rabbit.RABBIT_USER + '_cluster',
                              description='Check RabbitMQ Cluster',
                              check_cmd=_check_cmd)

    nrpe_compat.write()
コード例 #21
0
def update_nrpe_config():
    # Validate options (DEPRECATED)
    valid_alerts = ['ignore', 'warning', 'critical']
    if config('failed_actions_alert_type').lower() not in valid_alerts:
        status_set(
            'blocked', 'The value of option failed_actions_alert_type must be '
            'among {}'.format(valid_alerts))
        return
    if config('failed_actions_threshold') < 0:
        status_set(
            'blocked',
            'The value of option failed_actions_threshold must be a '
            'positive integer')
        return

    scripts_src = os.path.join(os.environ["CHARM_DIR"], "files", "nrpe")

    scripts_dst = "/usr/local/lib/nagios/plugins"
    if not os.path.exists(scripts_dst):
        os.makedirs(scripts_dst)
    for fname in glob.glob(os.path.join(scripts_src, "*")):
        if os.path.isfile(fname):
            shutil.copy2(fname,
                         os.path.join(scripts_dst, os.path.basename(fname)))

    sudoers_src = os.path.join(os.environ["CHARM_DIR"], "files", "sudoers")
    sudoers_dst = "/etc/sudoers.d"
    for fname in glob.glob(os.path.join(sudoers_src, "*")):
        if os.path.isfile(fname):
            shutil.copy2(fname,
                         os.path.join(sudoers_dst, os.path.basename(fname)))

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()

    nrpe_setup = nrpe.NRPE(hostname=hostname)

    apt_install('python-dbus')

    check_crm_cmd = 'check_crm -s'
    check_crm_cmd += ' --failedactions={}'.format(
        config('failed_actions_alert_type').lower())
    if config('failed_actions_threshold'):
        check_crm_cmd += ' --failcount={}'.format(
            config('failed_actions_threshold'))
    for err_type in ['warn', 'crit']:
        check_crm_cmd += ' --failcount-{}={}'.format(
            err_type,
            config('res_failcount_{}'.format(err_type)) or 0)

    if nrpe.NRPE.does_nrpe_conf_dir_exist():
        # corosync/crm checks

        # LP #1902919 - corosync version 2.99 changed the ring status output
        # for udp/udpu to hardcode the status to always report 'OK'. This
        # results in the check providing no value over what is provided by the
        # crm_status check. A version check on the package would be more ideal,
        # however populating the apt-cache object is expensive to run on each
        # config-changed hook, so use the faster check of comparing the
        # release name.
        ring_check = {
            'shortname': 'corosync_rings',
            'description': 'Check Corosync rings {}'.format(current_unit),
            'check_cmd': 'check_corosync_rings',
        }
        if CompareHostReleases(get_distrib_codename()) < 'eoan':
            nrpe_setup.add_check(**ring_check)
        else:
            nrpe_setup.remove_check(**ring_check)

        nrpe_setup.add_check(
            shortname='crm_status',
            description='Check crm status {}'.format(current_unit),
            check_cmd=check_crm_cmd)

        # process checks
        nrpe_setup.add_check(
            shortname='corosync_proc',
            description='Check Corosync process {}'.format(current_unit),
            check_cmd='check_procs -c 1:1 -C corosync')
        nrpe_setup.add_check(
            shortname='pacemakerd_proc',
            description='Check Pacemakerd process {}'.format(current_unit),
            check_cmd='check_procs -c 1:1 -C pacemakerd')

        nrpe_setup.write()
コード例 #22
0
def update_nrpe_checks():
    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(os.path.join(charm_dir(), 'scripts',
                           'check_rabbitmq.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py'))
        rsync(os.path.join(charm_dir(), 'scripts',
                           'check_rabbitmq_queues.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_queues.py'))
        if config('management_plugin'):
            rsync(os.path.join(charm_dir(), 'scripts',
                               'check_rabbitmq_cluster.py'),
                  os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_cluster.py'))

    if config('stats_cron_schedule'):
        script = os.path.join(SCRIPTS_DIR, 'collect_rabbitmq_stats.sh')
        cronjob = CRONJOB_CMD.format(schedule=config('stats_cron_schedule'),
                                     timeout=config('cron-timeout'),
                                     command=script)
        rsync(os.path.join(charm_dir(), 'scripts',
                           'collect_rabbitmq_stats.sh'), script)
        write_file(STATS_CRONFILE, cronjob)
    elif os.path.isfile(STATS_CRONFILE):
        os.remove(STATS_CRONFILE)

    if config('management_plugin'):
        rsync(os.path.join(charm_dir(), 'scripts',
                           'check_rabbitmq_cluster.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq_cluster.py'))

    # Find out if nrpe set nagios_hostname
    hostname = nrpe.get_nagios_hostname()
    myunit = nrpe.get_nagios_unit_name()

    # create unique user and vhost for each unit
    current_unit = local_unit().replace('/', '-')
    user = '******' % current_unit
    vhost = 'nagios-%s' % current_unit
    password = rabbit.get_rabbit_password(user, local=True)

    rabbit.create_vhost(vhost)
    rabbit.create_user(user, password, ['monitoring'])
    rabbit.grant_permissions(user, vhost)

    nrpe_compat = nrpe.NRPE(hostname=hostname)
    if config('ssl') in ['off', 'on']:
        cmd = ('{plugins_dir}/check_rabbitmq.py --user {user} '
               '--password {password} --vhost {vhost}')
        cmd = cmd.format(plugins_dir=NAGIOS_PLUGINS, user=user,
                         password=password, vhost=vhost)
        nrpe_compat.add_check(
            shortname=rabbit.RABBIT_USER,
            description='Check RabbitMQ {%s}' % myunit,
            check_cmd=cmd
        )
    if config('ssl') in ['only', 'on']:
        log('Adding rabbitmq SSL check', level=DEBUG)
        cmd = ('{plugins_dir}/check_rabbitmq.py --user {user} '
               '--password {password} --vhost {vhost} '
               '--ssl --ssl-ca {ssl_ca} --port {port}')
        cmd = cmd.format(plugins_dir=NAGIOS_PLUGINS,
                         user=user,
                         password=password,
                         port=int(config('ssl_port')),
                         vhost=vhost,
                         ssl_ca=SSL_CA_FILE)
        nrpe_compat.add_check(
            shortname=rabbit.RABBIT_USER + "_ssl",
            description='Check RabbitMQ (SSL) {%s}' % myunit,
            check_cmd=cmd
        )

    if config('queue_thresholds'):
        cmd = ""
        # If value of queue_thresholds is incorrect we want the hook to fail
        for item in yaml.safe_load(config('queue_thresholds')):
            cmd += ' -c "{}" "{}" {} {}'.format(*item)
        nrpe_compat.add_check(
            shortname=rabbit.RABBIT_USER + '_queue',
            description='Check RabbitMQ Queues',
            check_cmd='{}/check_rabbitmq_queues.py{} {}'.format(
                        NAGIOS_PLUGINS, cmd, STATS_DATAFILE)
        )
    if config('management_plugin'):
        # add NRPE check
        nrpe_compat.add_check(
            shortname=rabbit.RABBIT_USER + '_cluster',
            description='Check RabbitMQ Cluster',
            check_cmd='{}/check_rabbitmq_cluster.py --port {} --user {} --password {}'.format(
                        NAGIOS_PLUGINS,
                        rabbit.get_managment_port(),
                        user,
                        password
            )
        )

    nrpe_compat.write()
コード例 #23
0
def update_nrpe_config():
    # python-dbus is used by check_upstart_job
    apt_install('python-dbus')
    log('Refreshing nrpe checks')
    if not os.path.exists(NAGIOS_PLUGINS):
        mkpath(NAGIOS_PLUGINS)
    rsync(
        os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master',
                     'check_swift_storage.py'),
        os.path.join(NAGIOS_PLUGINS, 'check_swift_storage.py'))
    rsync(
        os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master',
                     'check_timed_logs.pl'),
        os.path.join(NAGIOS_PLUGINS, 'check_timed_logs.pl'))
    rsync(
        os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master',
                     'check_swift_replicator_logs.sh'),
        os.path.join(NAGIOS_PLUGINS, 'check_swift_replicator_logs.sh'))
    rsync(
        os.path.join(os.getenv('CHARM_DIR'), 'files', 'nrpe-external-master',
                     'check_swift_service'),
        os.path.join(NAGIOS_PLUGINS, 'check_swift_service'))
    rsync(
        os.path.join(os.getenv('CHARM_DIR'), 'files', 'sudo', 'swift-storage'),
        os.path.join(SUDOERS_D, 'swift-storage'))

    # Find out if nrpe set nagios_hostname
    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()
    nrpe_setup = nrpe.NRPE(hostname=hostname)

    # check the rings and replication
    nrpe_setup.add_check(
        shortname='swift_storage',
        description='Check swift storage ring hashes and replication'
        ' {%s}' % current_unit,
        check_cmd='check_swift_storage.py {}'.format(
            config('nagios-check-params')))

    object_port = config('object-server-port')
    container_port = config('container-server-port')
    account_port = config('account-server-port')

    nrpe_setup.add_check(
        shortname="swift-object-server-api",
        description="Check Swift Object Server API availability",
        check_cmd="/usr/lib/nagios/plugins/check_http \
                  -I localhost -u /recon/version -p {} \
                  -e \"OK\"".format(object_port))

    nrpe_setup.add_check(
        shortname="swift-container-server-api",
        description="Check Swift Container Server API availability",
        check_cmd="/usr/lib/nagios/plugins/check_http \
                  -I localhost -u /recon/version -p {} \
                  -e \"OK\"".format(container_port))

    nrpe_setup.add_check(
        shortname="swift-account-server-api",
        description="Check Swift Account Server API availability",
        check_cmd="/usr/lib/nagios/plugins/check_http \
                  -I localhost -u /recon/version -p {} \
                  -e \"OK\"".format(account_port))

    if config('nagios-replication-check-params'):
        nrpe_setup.add_check(
            shortname='swift_replicator_health',
            description='Check swift object replicator log reporting',
            check_cmd='check_swift_replicator_logs.sh {}'.format(
                config('nagios-replication-check-params')))
    else:
        nrpe_setup.remove_check(shortname='swift_replicator_health')

    nrpe.add_init_service_checks(nrpe_setup, SWIFT_SVCS, current_unit)
    nrpe_setup.write()
コード例 #24
0
def update_nrpe_config():
    hostname = nrpe.get_nagios_hostname()
    nrpe_setup = nrpe.NRPE(hostname=hostname)
    nrpe_setup.write()
コード例 #25
0
    def test_update_nrpe(self):
        self.patched['config'].return_value = {
            'nagios_context': 'a',
            'nagios_servicegroups': ''
        }
        self.patched['exists'].return_value = True

        def _rels(rname):
            relations = {
                'local-monitors': 'local-monitors:1',
                'nrpe-external-master': 'nrpe-external-master:2',
            }
            return [relations[rname]]

        self.patched['relation_ids'].side_effect = _rels

        checker = nrpe.NRPE()
        checker.add_check(shortname="myservice",
                          description="Check MyService",
                          check_cmd="check_http http://localhost")

        self.assertEqual(None, checker.write())

        self.assertEqual(2, self.patched['open'].call_count)
        filename = 'check_myservice.cfg'
        expected = [
            ('/etc/nagios/nrpe.d/%s' % filename, 'w'),
            ('/var/lib/nagios/export/service__a-testunit_%s' % filename, 'w'),
        ]
        actual = [x[0] for x in self.patched['open'].call_args_list]
        self.assertEqual(expected, actual)
        outfile = self.patched['open'].return_value.__enter__.return_value
        service_file_contents = """
#---------------------------------------------------
# This file is Juju managed
#---------------------------------------------------
define service {
    use                             active-service
    host_name                       a-testunit
    service_description             a-testunit[myservice] Check MyService
    check_command                   check_nrpe!check_myservice
    servicegroups                   a
}
"""
        expected = [
            '# check myservice\n',
            'command[check_myservice]=/usr/lib/nagios/plugins/check_http http://localhost\n',
            service_file_contents,
        ]
        actual = [x[0][0] for x in outfile.write.call_args_list]
        self.assertEqual(expected, actual)

        nrpe_monitors = {'myservice': {'command': 'check_myservice'}}
        monitors = yaml.dump({"monitors": {"remote": {"nrpe": nrpe_monitors}}})
        relation_set_calls = [
            call(monitors=monitors, relation_id="local-monitors:1"),
            call(monitors=monitors, relation_id="nrpe-external-master:2"),
        ]
        self.patched['relation_set'].assert_has_calls(relation_set_calls,
                                                      any_order=True)
        self.check_call_counts(config=1,
                               getpwnam=1,
                               getgrnam=1,
                               exists=3,
                               open=2,
                               listdir=1,
                               relation_ids=2,
                               relation_set=2)
コード例 #26
0
ファイル: test_nrpe.py プロジェクト: sabaini/charm-helpers
    def test_max_check_attmpts(self):
        self.patched['config'].return_value = {'nagios_context': 'a',
                                               'nagios_servicegroups': ''}
        self.patched['exists'].return_value = True
        self.patched['relation_get'].return_value = {
            'egress-subnets': '10.66.111.24/32',
            'ingress-address': '10.66.111.24',
            'private-address': '10.66.111.24'
        }

        def _rels(rname):
            relations = {
                'local-monitors': 'local-monitors:1',
                'nrpe-external-master': 'nrpe-external-master:2',
            }
            return [relations[rname]]
        self.patched['relation_ids'].side_effect = _rels

        checker = nrpe.NRPE()
        checker.add_check(shortname="myservice",
                          description="Check MyService",
                          check_cmd="check_http http://localhost",
                          max_check_attempts=8,
                          )

        self.assertEqual(None, checker.write())

        self.assertEqual(2, self.patched['open'].call_count)
        filename = 'check_myservice.cfg'
        expected = [
            ('/etc/nagios/nrpe.d/%s' % filename, 'w'),
            ('/var/lib/nagios/export/service__a-testunit_%s' % filename, 'w'),
        ]
        actual = [x[0] for x in self.patched['open'].call_args_list]
        self.assertEqual(expected, actual)
        outfile = self.patched['open'].return_value.__enter__.return_value
        service_file_contents = """
#---------------------------------------------------
# This file is Juju managed
#---------------------------------------------------
define service {
    use                             active-service
    host_name                       a-testunit
    service_description             a-testunit[myservice] Check MyService
    check_command                   check_nrpe!check_myservice
    servicegroups                   a
    max_check_attempts              8
}
"""
        expected = [
            '# check myservice\n',
            '# The following header was added automatically by juju\n',
            '# Modifying it will affect nagios monitoring and alerting\n',
            '# servicegroups: a\n',
            'command[check_myservice]=/usr/lib/nagios/plugins/check_http http://localhost\n',
            service_file_contents,
        ]
        actual = [x[0][0] for x in outfile.write.call_args_list]
        self.assertEqual(expected, actual)

        nrpe_monitors = {'myservice':
                         {'command': 'check_myservice',
                          'max_check_attempts': 8,
                          }}
        monitors = yaml.dump(
            {"monitors": {"remote": {"nrpe": nrpe_monitors}}})
        relation_set_calls = [
            call(monitors=monitors, relation_id="local-monitors:1"),
            call(monitors=monitors, relation_id="nrpe-external-master:2"),
        ]
        self.patched['relation_set'].assert_has_calls(relation_set_calls, any_order=True)
        self.check_call_counts(config=1, getpwnam=1, getgrnam=1,
                               exists=4, open=2, listdir=1, relation_get=2,
                               relation_ids=3, relation_set=3)
コード例 #27
0
 def test_default_servicegroup(self):
     """Test that nagios_servicegroups gets set to the default if omitted"""
     self.patched['config'].return_value = {'nagios_context': 'testctx'}
     checker = nrpe.NRPE()
     self.assertEqual(checker.nagios_servicegroups, 'testctx')
コード例 #28
0
ファイル: test_nrpe.py プロジェクト: sabaini/charm-helpers
    def test_add_init_service_checks(self, mock_isdir):
        def _exists(init_file):
            files = ['/etc/init/apache2.conf',
                     '/usr/lib/nagios/plugins/check_upstart_job',
                     '/etc/init.d/haproxy',
                     '/usr/lib/nagios/plugins/check_status_file.py',
                     '/etc/cron.d/nagios-service-check-haproxy',
                     '/var/lib/nagios/service-check-haproxy.txt',
                     '/usr/lib/nagios/plugins/check_systemd.py'
                     ]
            return init_file in files

        self.patched['exists'].side_effect = _exists

        # Test without systemd and /var/lib/nagios does not exist
        self.patched['init_is_systemd'].return_value = False
        mock_isdir.return_value = False
        bill = nrpe.NRPE()
        services = ['apache2', 'haproxy']
        nrpe.add_init_service_checks(bill, services, 'testunit')
        mock_isdir.assert_called_with('/var/lib/nagios')
        self.patched['call'].assert_not_called()
        expect_cmds = {
            'apache2': '/usr/lib/nagios/plugins/check_upstart_job apache2',
            'haproxy': '/usr/lib/nagios/plugins/check_status_file.py -f '
                       '/var/lib/nagios/service-check-haproxy.txt',
        }
        self.assertEqual(bill.checks[0].shortname, 'apache2')
        self.assertEqual(bill.checks[0].check_cmd, expect_cmds['apache2'])
        self.assertEqual(bill.checks[1].shortname, 'haproxy')
        self.assertEqual(bill.checks[1].check_cmd, expect_cmds['haproxy'])

        # without systemd and /var/lib/nagios does exist
        mock_isdir.return_value = True
        f = MagicMock()
        self.patched['open'].return_value = f
        bill = nrpe.NRPE()
        services = ['apache2', 'haproxy']
        nrpe.add_init_service_checks(bill, services, 'testunit')
        mock_isdir.assert_called_with('/var/lib/nagios')
        self.patched['call'].assert_called_with(
            ['/usr/local/lib/nagios/plugins/check_exit_status.pl', '-e', '-s',
             '/etc/init.d/haproxy', 'status'], stdout=f,
            stderr=subprocess.STDOUT)

        # Test regular services and snap services with systemd
        services = ['apache2', 'haproxy', 'snap.test.test',
                    'ceph-radosgw@hostname']
        self.patched['init_is_systemd'].return_value = True
        nrpe.add_init_service_checks(bill, services, 'testunit')
        expect_cmds = {
            'apache2': '/usr/lib/nagios/plugins/check_systemd.py apache2',
            'haproxy': '/usr/lib/nagios/plugins/check_systemd.py haproxy',
            'snap.test.test': '/usr/lib/nagios/plugins/check_systemd.py snap.test.test',
        }
        self.assertEqual(bill.checks[2].shortname, 'apache2')
        self.assertEqual(bill.checks[2].check_cmd, expect_cmds['apache2'])
        self.assertEqual(bill.checks[3].shortname, 'haproxy')
        self.assertEqual(bill.checks[3].check_cmd, expect_cmds['haproxy'])
        self.assertEqual(bill.checks[4].shortname, 'snap.test.test')
        self.assertEqual(bill.checks[4].check_cmd, expect_cmds['snap.test.test'])
コード例 #29
0
def update_nrpe_config():
    # Validate options
    valid_alerts = ['ignore', 'warning', 'critical']
    if config('failed_actions_alert_type').lower() not in valid_alerts:
        status_set(
            'blocked', 'The value of option failed_actions_alert_type must be '
            'among {}'.format(valid_alerts))
        return
    if config('failed_actions_threshold') <= 0:
        status_set(
            'blocked',
            'The value of option failed_actions_threshold must be a '
            'positive integer')
        return

    scripts_src = os.path.join(os.environ["CHARM_DIR"], "files", "nrpe")

    scripts_dst = "/usr/local/lib/nagios/plugins"
    if not os.path.exists(scripts_dst):
        os.makedirs(scripts_dst)
    for fname in glob.glob(os.path.join(scripts_src, "*")):
        if os.path.isfile(fname):
            shutil.copy2(fname,
                         os.path.join(scripts_dst, os.path.basename(fname)))

    sudoers_src = os.path.join(os.environ["CHARM_DIR"], "files", "sudoers")
    sudoers_dst = "/etc/sudoers.d"
    for fname in glob.glob(os.path.join(sudoers_src, "*")):
        if os.path.isfile(fname):
            shutil.copy2(fname,
                         os.path.join(sudoers_dst, os.path.basename(fname)))

    hostname = nrpe.get_nagios_hostname()
    current_unit = nrpe.get_nagios_unit_name()

    nrpe_setup = nrpe.NRPE(hostname=hostname)

    apt_install('python-dbus')

    if config('failed_actions_alert_type').lower() == 'ignore':
        check_crm_cmd = 'check_crm --failedactions=ignore'
    else:
        check_crm_cmd = ('check_crm --failcounts={} --failedactions={}'.format(
            config('failed_actions_threshold'),
            config('failed_actions_alert_type').lower()))

    # corosync/crm checks
    nrpe_setup.add_check(
        shortname='corosync_rings',
        description='Check Corosync rings {}'.format(current_unit),
        check_cmd='check_corosync_rings')
    nrpe_setup.add_check(
        shortname='crm_status',
        description='Check crm status {}'.format(current_unit),
        check_cmd=check_crm_cmd)

    # process checks
    nrpe_setup.add_check(
        shortname='corosync_proc',
        description='Check Corosync process {}'.format(current_unit),
        check_cmd='check_procs -c 1:1 -C corosync')
    nrpe_setup.add_check(
        shortname='pacemakerd_proc',
        description='Check Pacemakerd process {}'.format(current_unit),
        check_cmd='check_procs -c 1:1 -C pacemakerd')

    nrpe_setup.write()