def update_nrpe_checks():
    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(os.path.join(os.getenv('CHARM_DIR'), 'scripts',
                           'check_rabbitmq.py'),
              os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py'))
    user = '******'
    vhost = 'nagios'
    password_file = os.path.join(RABBIT_DIR, '%s.passwd' % user)
    if os.path.exists(password_file):
        password = open(password_file).read().strip()
    else:
        cmd = ['pwgen', '64', '1']
        password = subprocess.check_output(cmd).strip()
        with open(password_file, 'wb') as out:
            out.write(password)

    rabbit.create_vhost(vhost)
    rabbit.create_user(user, password)
    rabbit.grant_permissions(user, vhost)

    nrpe_compat = NRPE()
    nrpe_compat.add_check(
        shortname=rabbit.RABBIT_USER,
        description='Check RabbitMQ',
        check_cmd='{}/check_rabbitmq.py --user {} --password {} --vhost {}'
                  ''.format(NAGIOS_PLUGINS, user, password, vhost)
    )
    nrpe_compat.write()
Beispiel #2
0
def update_nrpe_config():
    update_nagios_pgpass()
    nrpe = NRPE()

    user = nagios_username()
    port = postgresql.port()
    nrpe.add_check(shortname="pgsql", description="Check pgsql", check_cmd="check_pgsql -P {} -l {}".format(port, user))

    if reactive.is_state("postgresql.replication.is_master"):
        # TODO: These should be calcualted from the backup schedule,
        # which is difficult since that is specified in crontab format.
        warn_age = 172800
        crit_age = 194400
        backups_log = helpers.backups_log_path()
        nrpe.add_check(
            shortname="pgsql_backups",
            description="Check pgsql backups",
            check_cmd=("check_file_age -w {} -c {} -f {}" "".format(warn_age, crit_age, backups_log)),
        )
    else:
        # Standbys don't do backups. We still generate a check though,
        # to ensure alerts get through to monitoring after a failover.
        nrpe.add_check(
            shortname="pgsql_backups",
            description="Check pgsql backups",
            check_cmd=r"check_dummy 0 standby_does_not_backup",
        )
    nrpe.write()
    reactive.remove_state("postgresql.nagios.needs_update")
Beispiel #3
0
def update_nrpe_checks():
    if os.path.isdir(NAGIOS_PLUGINS):
        rsync(
            os.path.join(os.getenv('CHARM_DIR'), 'scripts',
                         'check_rabbitmq.py'),
            os.path.join(NAGIOS_PLUGINS, 'check_rabbitmq.py'))
    user = '******'
    vhost = 'nagios'
    password_file = os.path.join(RABBIT_DIR, '%s.passwd' % user)
    if os.path.exists(password_file):
        password = open(password_file).read().strip()
    else:
        cmd = ['pwgen', '64', '1']
        password = subprocess.check_output(cmd).strip()
        with open(password_file, 'wb') as out:
            out.write(password)

    rabbit.create_vhost(vhost)
    rabbit.create_user(user, password)
    rabbit.grant_permissions(user, vhost)

    nrpe_compat = NRPE()
    nrpe_compat.add_check(
        shortname=rabbit.RABBIT_USER,
        description='Check RabbitMQ',
        check_cmd='{}/check_rabbitmq.py --user {} --password {} --vhost {}'
        ''.format(NAGIOS_PLUGINS, user, password, vhost))
    nrpe_compat.write()
Beispiel #4
0
    def render_checks(self, creds):
        render(source='keystone.yaml',
               target=self.oscreds,
               context=creds,
               owner='nagios',
               group='nagios')

        nrpe = NRPE()
        if not os.path.exists(self.plugins_dir):
            os.makedirs(self.plugins_dir)

        charm_plugin_dir = os.path.join(hookenv.charm_dir(), 'files',
                                        'plugins/')
        host.rsync(charm_plugin_dir,
                   self.plugins_dir,
                   options=['--executability'])

        contrail_check_command = os.path.join(self.plugins_dir,
                                              'check_contrail_alarms.py')
        nrpe.add_check(
            shortname='contrail_alarms',
            description='Check Contrail alarms',
            check_cmd=contrail_check_command,
        )

        nrpe.write()
Beispiel #5
0
    def remove_rally_check(self):
        filename = self.rally_cron_file
        if os.path.exists(filename):
            os.unlink(filename)

        if os.path.exists('/etc/nagios/nrpe.d/check_rally.cfg'):
            nrpe = NRPE()
            nrpe.remove_check(shortname='rally')
            nrpe.write()
Beispiel #6
0
def update_nrpe_checks():
    log('Refreshing nrpe checks')
    # Find out if nrpe set nagios_hostname
    hostname = None
    for rel in relations_of_type('nrpe-external-master'):
        if 'nagios_hostname' in rel:
            hostname = rel['nagios_hostname']
            break
    nrpe = NRPE(hostname=hostname)
    nrpe.add_check(shortname='mysql_proc',
                   description='Check MySQL process',
                   check_cmd='check_procs -c 1:1 -C mysqld')
    nrpe.add_check(shortname='mysql',
                   description='Check MySQL connectivity',
                   check_cmd='check_mysql -u nagios -p {}'.format(
                       nagios_password()))
    nrpe.write()
def update_nrpe_checks():
    log("Refreshing nrpe checks")
    # Find out if nrpe set nagios_hostname
    hostname = None
    for rel in relations_of_type("nrpe-external-master"):
        if "nagios_hostname" in rel:
            hostname = rel["nagios_hostname"]
            break
    nrpe = NRPE(hostname=hostname)
    nrpe.add_check(shortname="mysql_proc", description="Check MySQL process", check_cmd="check_procs -c 1:1 -C mysqld")
    nrpe.add_check(
        shortname="mysql",
        description="Check MySQL connectivity",
        check_cmd="check_mysql -u nagios -p {}".format(nagios_password()),
    )
    nrpe.write()
Beispiel #8
0
    def configure_rally_check(self):
        kv = unitdata.kv()
        if kv.get('rallyconfigured', False):
            return

        self.update_rally_checkfiles()
        rally_check = os.path.join(self.plugins_dir, 'check_rally.py')
        nrpe = NRPE()
        nrpe.add_check(
            shortname='rally',
            description='Check that all rally tests pass',
            check_cmd=rally_check,
        )
        nrpe.write()
        kv.set('rallyconfigured', True)
Beispiel #9
0
def update_nagios(svc):
    status_set('maintenance', 'configuring Nagios checks')
    hostname = get_nagios_hostname()
    current_unit = get_nagios_unit_name()
    nrpe = NRPE(hostname=hostname)
    add_init_service_checks(nrpe, ['vault'], current_unit)
    write_file('/usr/lib/nagios/plugins/check_vault_version.py',
               open('files/nagios/check_vault_version.py', 'rb').read(),
               perms=0o755)
    nrpe.add_check(
        'vault_version',
        'Check running vault server version is same as installed snap',
        '/usr/lib/nagios/plugins/check_vault_version.py',
    )
    nrpe.write()
    set_state('vault.nrpe.configured')
Beispiel #10
0
def update_nagios(svc):
    status_set('maintenance', 'configuring Nagios checks')
    hostname = get_nagios_hostname()
    current_unit = get_nagios_unit_name()
    nrpe = NRPE(hostname=hostname)
    remove_deprecated_check(nrpe, ['vault_version'])
    add_init_service_checks(nrpe, ['vault'], current_unit)
    try:
        os.remove('/usr/lib/nagios/plugins/check_vault_version.py')
    except FileNotFoundError:
        pass
    write_file('/usr/lib/nagios/plugins/check_vault_health.py',
               open('files/nagios/check_vault_health.py', 'rb').read(),
               perms=0o755)
    nrpe.add_check(
        'vault_health',
        'Check running vault server version and health',
        '/usr/lib/nagios/plugins/check_vault_health.py',
    )
    nrpe.write()
    set_state('vault.nrpe.configured')
Beispiel #11
0
def update_nrpe_config():
    update_nagios_pgpass()
    nrpe = NRPE()

    user = nagios_username()
    port = postgresql.port()
    nrpe.add_check(
        shortname="pgsql",
        description="Check pgsql",
        check_cmd="check_pgsql -P {} -l {}".format(port, user),
    )

    # copy the check script which will run cronned as postgres user
    with open("scripts/find_latest_ready_wal.py") as fh:
        check_script = fh.read()

    check_script_path = "{}/{}".format(helpers.scripts_dir(), "find_latest_ready_wal.py")
    helpers.write(check_script_path, check_script, mode=0o755)

    # create an (empty) file with appropriate permissions for the above
    check_output_path = "/var/lib/nagios/postgres-wal-max-age.txt"
    if not os.path.exists(check_output_path):
        helpers.write(check_output_path, b"0\n", mode=0o644, user="******", group="postgres")

    # retrieve the threshold values from the charm config
    config = hookenv.config()
    check_warn_threshold = config["wal_archive_warn_threshold"] or 0
    check_crit_threshold = config["wal_archive_crit_threshold"] or 0

    check_cron_path = "/etc/cron.d/postgres-wal-archive-check"
    if check_warn_threshold and check_crit_threshold:
        # create the cron job to run the above
        check_cron = "*/2 * * * * postgres {}".format(check_script_path)
        helpers.write(check_cron_path, check_cron, mode=0o644)

    # copy the nagios plugin which will check the cronned output
    with open("scripts/check_latest_ready_wal.py") as fh:
        check_script = fh.read()
    check_script_path = "{}/{}".format("/usr/local/lib/nagios/plugins", "check_latest_ready_wal.py")
    helpers.write(check_script_path, check_script, mode=0o755)

    # write the nagios check definition
    nrpe.add_check(
        shortname="pgsql_stale_wal",
        description="Check for stale WAL backups",
        check_cmd="{} {} {}".format(check_script_path, check_warn_threshold, check_crit_threshold),
    )

    if reactive.is_state("postgresql.replication.is_master"):
        # TODO: These should be calculated from the backup schedule,
        # which is difficult since that is specified in crontab format.
        warn_age = 172800
        crit_age = 194400
        backups_log = helpers.backups_log_path()
        nrpe.add_check(
            shortname="pgsql_backups",
            description="Check pgsql backups",
            check_cmd=("check_file_age -w {} -c {} -f {}" "".format(warn_age, crit_age, backups_log)),
        )
    else:
        # Standbys don't do backups. We still generate a check though,
        # to ensure alerts get through to monitoring after a failover.
        nrpe.add_check(
            shortname="pgsql_backups",
            description="Check pgsql backups",
            check_cmd=r"check_dummy 0 standby_does_not_backup",
        )
    nrpe.write()
    reactive.remove_state("postgresql.nagios.needs_update")
Beispiel #12
0
    def create_endpoint_checks(self, creds):
        """
        Create an NRPE check for each Keystone catalog endpoint.

        Read the Keystone catalog, and create a check for each endpoint listed.
        If there is a healthcheck endpoint for the API, use that URL, otherwise check
        the url '/'.
        If SSL, add a check for the cert.

        v2 endpoint needs the 'interface' attribute:
        <Endpoint {'id': 'XXXXX', 'region': 'RegionOne', 'publicurl': 'http://10.x.x.x:9696',
        'service_id': 'YYY', 'internalurl': 'http://10.x.x.x:9696', 'enabled': True,
        'adminurl': 'http://10.x.x.x:9696'}>
        """
        # provide URLs that can be used for healthcheck for some services
        # This also provides a nasty hack-ish way to add switches if we need
        # for some services.
        health_check_params = {
            'aodh': '/healthcheck',
            'barbican': '/v1 -e Unauthorized',
            'ceilometer': '/ -e Unauthorized -d x-openstack-request-id',
            'cinderv1': '/v1 -e Unauthorized -d x-openstack-request-id',
            'cinderv2': '/v2 -e Unauthorized',
            'cinderv3': '/v3 -e Unauthorized -d x-openstack-request-id',
            'designate': '/v2 -e Unauthorized',
            'glance': '/healthcheck',
            'gnocchi': '/v1 -e Unauthorized',
            'heat': '/v1 -e Unauthorized',
            'keystone': '/healthcheck',
            'nova': '/healthcheck',
            'octavia': '/v2 -e Unauthorized',
            'placement':
            '/healthcheck -e Unauthorized -d x-openstack-request-id',
            's3': '/healthcheck',
            'swift': self.charm_config.get('swift_check_params', '/'),
        }

        self.get_keystone_client(creds)
        endpoints = self.keystone_endpoints
        services = [svc for svc in self.keystone_services if svc.enabled]
        nrpe = NRPE()
        skip_service = set()
        for endpoint in endpoints:
            endpoint.service_names = [
                x.name for x in services if x.id == endpoint.service_id
            ]
            service_name = endpoint.service_names[0]
            endpoint.healthcheck_url = health_check_params.get(
                service_name, '/')

            # Note(aluria): glance-simplestreams-sync does not provide an API to check
            if service_name == 'image-stream':
                continue

            if not hasattr(endpoint, 'interface'):
                if service_name == 'keystone':
                    # Note(aluria): filter:healthcheck is not configured in v2
                    # https://docs.openstack.org/keystone/pike/configuration.html#health-check-middleware
                    continue
                for interface in 'admin internal public'.split():
                    old_interface_name = '{}url'.format(interface)
                    if not hasattr(endpoint, old_interface_name):
                        continue
                    endpoint.interface = interface
                    endpoint.url = getattr(endpoint, old_interface_name)
                    skip_service.add(service_name)
                    break

            check_url = urlparse(endpoint.url)
            if not self.charm_config.get('check_{}_urls'.format(
                    endpoint.interface)):
                nrpe.remove_check(
                    shortname='{}_{}'.format(service_name, endpoint.interface))
                if check_url.scheme == 'https':
                    nrpe.remove_check(shortname='{}_{}_cert'.format(
                        service_name, endpoint.interface))
                continue

            cmd_params = ['/usr/lib/nagios/plugins/check_http']
            host, port = self._split_url(check_url.netloc, check_url.scheme)
            cmd_params.append('-H {} -p {}'.format(host, port))
            cmd_params.append('-u {}'.format(endpoint.healthcheck_url))

            # if this is https, we want to add a check for cert expiry
            # also need to tell check_http use use TLS
            if check_url.scheme == 'https':
                cmd_params.append('-S')
                # Add an extra check for TLS cert expiry
                cmd_params_cert = cmd_params.copy()
                cmd_params_cert.append('-C {},{}'.format(
                    self.charm_config['tls_warn_days'] or 30,
                    self.charm_config['tls_crit_days'] or 14))
                nrpe.add_check(
                    shortname='{}_{}_cert'.format(service_name,
                                                  endpoint.interface),
                    description='Certificate expiry check for {} {}'.format(
                        service_name, endpoint.interface),
                    check_cmd=' '.join(cmd_params_cert))

            # Add the actual health check for the URL
            nrpe.add_check(shortname='{}_{}'.format(service_name,
                                                    endpoint.interface),
                           description='Endpoint url check for {} {}'.format(
                               service_name, endpoint.interface),
                           check_cmd=' '.join(cmd_params))

        nrpe.write()
Beispiel #13
0
    def render_checks(self, creds):
        render(source='nagios.novarc',
               target=self.novarc,
               context=creds,
               owner='nagios',
               group='nagios')

        nrpe = NRPE()
        if not os.path.exists(self.plugins_dir):
            os.makedirs(self.plugins_dir)

        self.update_plugins()
        nova_check_command = os.path.join(self.plugins_dir,
                                          'check_nova_services.py')
        check_command = '{} --warn {} --crit {} --skip-aggregates {} {}'.format(
            nova_check_command, self.nova_warn, self.nova_crit,
            self.nova_skip_aggregates, self.skip_disabled).strip()
        nrpe.add_check(
            shortname='nova_services',
            description='Check that enabled Nova services are up',
            check_cmd=check_command,
        )
        if self.is_neutron_agents_check_enabled:
            nrpe.add_check(
                shortname='neutron_agents',
                description='Check that enabled Neutron agents are up',
                check_cmd=os.path.join(self.plugins_dir,
                                       'check_neutron_agents.sh'),
            )
        else:
            nrpe.remove_check(shortname='neutron_agents')

        if self.is_loadbalancers_check_enabled:
            nrpe.add_check(
                shortname='loadbalancers',
                description='Check loadbalancers status',
                check_cmd=os.path.join(self.plugins_dir,
                                       'check_loadbalancers.py'),
            )
        else:
            nrpe.remove_check(shortname='loadbalancers')

        if self.contrail_analytics_vip:
            contrail_check_command = '{} --host {}'.format(
                os.path.join(self.plugins_dir,
                             'check_contrail_analytics_alarms.py'),
                self.contrail_analytics_vip)
            nrpe.add_check(
                shortname='contrail_analytics_alarms',
                description='Check Contrail Analytics alarms',
                check_cmd=contrail_check_command,
            )
        else:
            nrpe.remove_check(shortname='contrail_analytics_alarms')

        if len(self.check_dns):
            nrpe.add_check(
                shortname='dns_multi',
                description='Check DNS names are resolvable',
                check_cmd='{} {}'.format(
                    os.path.join(self.plugins_dir, 'check_dns_multi.sh'),
                    ' '.join(self.check_dns.split())),
            )
        else:
            nrpe.remove_check(shortname='dns_multi')
        nrpe.write()

        self.create_endpoint_checks(creds)