def swift_async(): """Parse swift-recon's async pendings statistics and return them. :: >>> swift_async() {'avg': '0.0', 'failed': '0.0%', 'high': '0', 'low': '0', 'no_result': '0', 'reported': '2', 'total': '0'} :returns: Dictionary of average, failed, high, low, no_result, reported, and total statistics. """ regexp = stat_regexp_generator('async_pending') async_dicts = recon_stats_dicts('object', ['-a'], '[async_pending]', regexp) stats = {} for async_dict in async_dicts: if async_dict: stats = async_dict # Break will skip the for-loop's else block break else: # If we didn't find a non-empty dict, error out maas_common.status_err( 'No data could be collected about pending async operations' ) return {'async': stats}
def check(args, tenant_id): HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format (ip=args.ip, tenant=tenant_id)) try: heat = get_heat_client(endpoint=HEAT_ENDPOINT) is_up = True except exc.HTTPException as e: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time() heat.build_info.build_info() end = time() milliseconds = (end - start) * 1000 # Add other metrics stack_count = len(list(heat.stacks.list())) status_ok() metric_bool('heat_api_local_status', is_up) if is_up: # only want to send other metrics if api is up metric('heat_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('heat_stack_count', 'uint32', stack_count, 'stacks')
def check_process_running(process_names, container_name=None): """Check to see if processes are running. Check if each of the processes in process_names are in a list of running processes in the specified container name, or on this host. """ if not process_names: # The caller has not provided a value for process_names, which gives us # nothing to do. Return an error for the check. status_err('No process names provided') procs_path = '/sys/fs/cgroup/cpu/cgroup.procs' if container_name is not None: # Checking for processes in a container, not the parent host procs_path = os.path.join('/sys/fs/cgroup/cpu/lxc', container_name, 'cgroup.procs') procs = get_processes(procs_path) if not procs: # Unable to get a list of process names for the container or host. status_err('Could not get a list of running processes') # Since we've fetched a process list, report status_ok. status_ok() # Report the presence of each process from the command line in the # running process list for the host or specified container. for process_name in process_names: metric_bool('%s_process_status' % process_name, process_name in procs)
def main(args): if len(args.omc) != 2: args = ' '.join(args.omc) status_err('Requires 2 arguments, arguments provided: "%s"' % args, m_name='maas_hwvendor') report_type = args.omc[0].lower() report_request = args.omc[1].lower() # If we're not using the correct version of OpenManage, error out check_openmanage_version() try: report = hardware_report(report_type, report_request) except (OSError, subprocess.CalledProcessError) as e: metric_bool('hardware_%s_status' % report_request, False) status_err(str(e), m_name='maas_hwvendor') status_ok(m_name='maas_hwvendor') if report_request == 'pwrsupplies': metric_bool('hardware_%s_status' % report_request, all_okay(report, regex[report_request])) else: metric_bool('hardware_%s_status' % report_request, all_okay(report, regex[report_type]))
def check(args): octavia = get_openstack_client('load_balancer') try: if args.ip: octavia_local_endpoint = generate_local_endpoint( str(octavia.get_endpoint()), args.ip, args.port, args.protocol, '/lbaas/loadbalancers?limit=1' ) resp = octavia.session.get(octavia_local_endpoint, timeout=180) except (exc.HTTPError, exc.Timeout, exc.ConnectionError): is_up = False metric_bool('client_success', False, m_name='maas_octavia') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_octavia') status_err(str(e), m_name='maas_octavia') else: is_up = resp.ok metric_bool('client_success', True, m_name='maas_octavia') milliseconds = resp.elapsed.total_seconds() * 1000 status_ok(m_name='maas_octavia') metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia') if is_up: # only want to send other metrics if api is up metric('octavia_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): COMPUTE_ENDPOINT = 'http://{ip}:8774/v3'.format(ip=args.ip) try: nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT) is_up = True except exc.ClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time() nova.services.list() end = time() milliseconds = (end - start) * 1000 # gather some metrics status_count = collections.Counter( [s.status for s in nova.servers.list()]) status_ok() metric_bool('nova_api_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('nova_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') for status in SERVER_STATUSES: metric('nova_servers_in_state_%s' % status, 'uint32', status_count[status])
def main(): try: os.stat('/usr/sbin/ssacli') ssacli_bin = 'ssacli' except Exception: try: os.stat('/usr/sbin/hpssacli') ssacli_bin = 'hpssacli' except Exception: maas_common.status_err('Neither ssacli or hpssacli could be found', m_name='hp_monitoring') status = {} status['hardware_processors_status'] = \ get_chassis_status('hpasmcli', 'server') status['hardware_memory_status'] = get_chassis_status('hpasmcli', 'dimm') status['hardware_disk_status'] = get_drive_status(ssacli_bin) status['hardware_controller_status'] = get_controller_status(ssacli_bin) status['hardware_controller_cache_status'] = \ get_controller_cache_status(ssacli_bin) status['hardware_controller_battery_status'] = \ get_controller_battery_status(ssacli_bin) maas_common.status_ok(m_name='maas_hwvendor') for name, value in status.viewitems(): maas_common.metric_bool(name, value, m_name='maas_hwvendor')
def check(auth_ref, args): MAGNUM_ENDPOINT = 'http://{ip}:9511/v1'.format(ip=args.ip,) try: if args.ip: magnum = get_magnum_client(endpoint=MAGNUM_ENDPOINT) else: magnum = get_magnum_client() api_is_up = True except exc.HttpError as e: api_is_up = False metric_bool('client_success', False, m_name='maas_magnum') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_magnum') status_err(str(e), m_name='maas_magnum') else: metric_bool('client_success', True, m_name='maas_magnum') services = magnum.mservices.list() status_ok(m_name='maas_magnum') metric_bool('magnum_api_local_status', api_is_up, m_name='maas_magnum') if api_is_up: for service in services: metric_bool('_'.join([service.binary, 'status']), True if service.state == 'up' else False)
def check(auth_ref, args): IRONIC_ENDPOINT = ('http://{ip}:6385/v1'.format(ip=args.ip)) try: if args.ip: ironic = get_ironic_client(endpoint=IRONIC_ENDPOINT) else: ironic = get_ironic_client() is_up = True except exc.ClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_ironic') status_err(str(e), m_name='maas_ironic') else: metric_bool('client_success', True, m_name='maas_ironic') # time something arbitrary start = time.time() ironic.node.list() end = time.time() milliseconds = (end - start) * 1000 status_ok(m_name='maas_ironic') metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic') if is_up: # only want to send other metrics if api is up metric('ironic_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def main(args): """Main function.""" if not args.processes: # The command line does not have any process names specified status_err('No executable names supplied', m_name='maas_process') check_process_running(process_names=args.processes)
def check(args): heat = get_openstack_client('orchestration') try: local_heat_endpoint = generate_local_endpoint( str(heat.get_endpoint()), args.ip, args.port, args.protocol, '/build_info' ) resp = heat.session.get(local_heat_endpoint) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_heat') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_heat') status_err(str(e), m_name='maas_heat') else: is_up = True milliseconds = resp.elapsed.total_seconds() * 1000 metric_bool('client_success', True, m_name='maas_heat') status_ok(m_name='maas_heat') metric_bool('heat_api_local_status', is_up, m_name='maas_heat') if is_up: # only want to send other metrics if api is up metric('heat_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(auth_ref, args): keystone = get_keystone_client(auth_ref) tenant_id = keystone.tenant_id HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format (ip=args.ip, tenant=tenant_id)) try: if args.ip: heat = get_heat_client(endpoint=HEAT_ENDPOINT) else: heat = get_heat_client() is_up = True except exc.HTTPException as e: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time.time() heat.build_info.build_info() end = time.time() milliseconds = (end - start) * 1000 status_ok() metric_bool('heat_api_local_status', is_up) if is_up: # only want to send other metrics if api is up metric('heat_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): octavia = get_openstack_client('load_balancer') try: if args.ip: octavia_local_endpoint = generate_local_endpoint( str(octavia.get_endpoint()), args.ip, args.port, args.protocol, '/lbaas/loadbalancers' ) resp = octavia.session.get(octavia_local_endpoint, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_octavia') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_octavia') status_err(str(e), m_name='maas_octavia') else: is_up = resp.ok metric_bool('client_success', True, m_name='maas_octavia') status_ok(m_name='maas_octavia') metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia') if is_up: loadbalancers = resp.json()['loadbalancers'] num = len([lb for lb in loadbalancers if lb['provisioning_status'] == 'ERROR']) # only want to send other metrics if api is up metric('octavia_num_lb_in_error_status', 'uint32', num, 'ms')
def check(args): ironic = get_openstack_client('baremetal') try: ironic_local_endpoint = generate_local_endpoint( str(ironic.get_endpoint()), args.ip, args.port, args.protocol, '/nodes' ) resp = ironic.session.get(ironic_local_endpoint) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_ironic') status_err(str(e), m_name='maas_ironic') else: is_up = resp.status_code == 200 milliseconds = resp.elapsed.total_seconds() * 1000 metric_bool('client_success', True, m_name='maas_ironic') status_ok(m_name='maas_ironic') metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic') if is_up: metric('ironic_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def main(): usage = "Usage: %prog [-h] [-H] username password" parser = optparse.OptionParser(usage=usage) parser.add_option( '-H', '--host', action='store', dest='host', default=None, help="Allow user to connect to something other than localhost" ) (options, args) = parser.parse_args() # We will need the username and password to connect to the database if len(args) != 2: parser.print_help() raise SystemExit(True) # According to # http://www.percona.com/doc/percona-toolkit/2.2/pt-table-checksum.html # If the exit status is 0, everything is okay, otherwise the exit status # will be non-zero. We don't need stdout at the moment so we can discard # it. Stderr should contain any problems we run across. (status, _, err) = table_checksum(args[0], args[1], options.host) if status != 0: status_err(err.strip()) raise SystemExit(True) status_ok()
def check(auth_ref, args): MAGNUM_ENDPOINT = 'http://{ip}:9511/v1'.format(ip=args.ip,) try: if args.ip: magnum = get_magnum_client(endpoint=MAGNUM_ENDPOINT) else: magnum = get_magnum_client() api_is_up = True except exc.HttpError as e: api_is_up = False metric_bool('client_success', False, m_name='maas_magnum') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_magnum') status_err(str(e), m_name='maas_magnum') else: metric_bool('client_success', True, m_name='maas_magnum') # time something arbitrary start = time.time() magnum.cluster_templates.list() end = time.time() milliseconds = (end - start) * 1000 status_ok(m_name='maas_magnum') metric_bool('magnum_api_local_status', api_is_up, m_name='maas_magnum') if api_is_up: # only want to send other metrics if api is up metric('magnum_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): octavia = get_openstack_client('load_balancer') try: if args.ip: octavia_local_endpoint = generate_local_endpoint( str(octavia.get_endpoint()), args.ip, args.port, args.protocol, '/lbaas/loadbalancers') resp = octavia.session.get(octavia_local_endpoint, timeout=180) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_octavia') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_octavia') status_err(str(e), m_name='maas_octavia') else: is_up = resp.ok metric_bool('client_success', True, m_name='maas_octavia') status_ok(m_name='maas_octavia') metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia') if is_up: loadbalancers = resp.json()['loadbalancers'] num = len([ lb for lb in loadbalancers if lb['provisioning_status'] == 'ERROR' ]) # only want to send other metrics if api is up metric('octavia_num_lb_in_error_status', 'uint32', num, 'ms')
def check(auth_ref, args): keystone = get_keystone_client(auth_ref) auth_token = keystone.auth_token VOLUME_ENDPOINT = 'http://{ip}:8776/v1/{tenant}' \ .format(ip=args.ip, tenant=keystone.tenant_id) s = requests.Session() s.headers.update( {'Content-type': 'application/json', 'x-auth-token': auth_token}) try: r = s.get('%s/os-services' % VOLUME_ENDPOINT, verify=False, timeout=10) except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e: status_err(str(e)) if not r.ok: status_err('could not get response from cinder api') status_ok() services = r.json()['services'] for service in services: service_is_up = True if service['status'] == 'enabled' and service['state'] != 'up': service_is_up = False metric_bool('%s_on_host_%s' % (service['binary'], service['host']), service_is_up)
def check(args, tenant_id): CEILOMETER_ENDPOINT = 'http://{ip}:8777'.format(ip=args.ip) try: ceilometer = get_ceilometer_client(endpoint=CEILOMETER_ENDPOINT) is_up = True except exc.HTTPException as e: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time() meters = ceilometer.meters.list() # Exceptions are only thrown when we iterate over meter [i.meter_id for i in meters] end = time() milliseconds = (end - start) * 1000 status_ok() metric_bool('ceilometer_api_local_status', is_up) if is_up: # only want to send other metrics if api is up metric('ceilometer_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(auth_ref, args): # We call get_keystone_client here as there is some logic within to get a # new token if previous one is bad. keystone = get_keystone_client(auth_ref) auth_token = keystone.auth_token registry_endpoint = 'http://{ip}:9191'.format(ip=args.ip) s = Session() s.headers.update( {'Content-type': 'application/json', 'x-auth-token': auth_token}) try: # /images returns a list of public, non-deleted images r = s.get('%s/images' % registry_endpoint, verify=False, timeout=10) is_up = r.ok except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False except Exception as e: status_err(str(e)) status_ok() metric_bool('glance_registry_local_status', is_up) # only want to send other metrics if api is up if is_up: milliseconds = r.elapsed.total_seconds() * 1000 metric('glance_registry_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): keystone = get_openstack_client('identity') local_keystone_endpoint = "{}://{}:{}/v{}/services".format( args.protocol, args.ip, args.port, keystone.get_api_major_version()[0]) try: resp = keystone.session.get('%s' % local_keystone_endpoint, timeout=180) milliseconds = resp.elapsed.total_seconds() * 1000 is_up = resp.ok except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_keystone') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_keystone') status_err(str(e), m_name='maas_keystone') else: metric_bool('client_success', True, m_name='maas_keystone') # gather some vaguely interesting metrics to return project_count = len([i for i in keystone.projects()]) user_count = len([i for i in keystone.users()]) status_ok(m_name='maas_keystone') metric_bool('keystone_api_local_status', is_up, m_name='maas_keystone') if is_up: metric('keystone_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('keystone_user_count', 'uint32', user_count, 'users') metric('keystone_tenant_count', 'uint32', project_count, 'tenants')
def check(args): NETWORK_ENDPOINT = "http://{hostname}:9696".format(hostname=args.hostname) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) # not gathering api status metric here so catch any exception except Exception as e: status_err(str(e)) # gather nova service states if args.host: agents = neutron.list_agents(host=args.host)["agents"] else: agents = neutron.list_agents()["agents"] if len(agents) == 0: status_err("No host(s) found in the agents list") # return all the things status_ok() for agent in agents: agent_is_up = True if agent["admin_state_up"] and not agent["alive"]: agent_is_up = False if args.host: name = "%s_status" % agent["binary"] else: name = "%s_%s_on_host_%s" % (agent["binary"], agent["id"], agent["host"]) metric_bool(name, agent_is_up)
def check(args): NETWORK_ENDPOINT = 'http://{hostname}:9696'.format(hostname=args.hostname) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) # not gathering api status metric here so catch any exception except Exception as e: status_err(str(e)) # gather nova service states if args.host: agents = neutron.list_agents(host=args.host)['agents'] else: agents = neutron.list_agents()['agents'] if len(agents) == 0: status_err("No host(s) found in the agents list") # return all the things for agent in agents: agent_is_up = True if agent['admin_state_up'] and not agent['alive']: agent_is_up = False if args.host: name = '%s_status' % agent['binary'] else: name = '%s_%s_on_host_%s' % (agent['binary'], agent['id'], agent['host']) metric('neutron_service', name, str(int(agent_is_up)))
def main(): usage = "Usage: %prog [-h] [-H] username password" parser = optparse.OptionParser(usage=usage) parser.add_option( '-H', '--host', action='store', dest='host', default=None, help="Allow user to connect to something other than localhost") (options, args) = parser.parse_args() # We will need the username and password to connect to the database if len(args) != 2: parser.print_help() raise SystemExit(True) # According to # http://www.percona.com/doc/percona-toolkit/2.2/pt-table-checksum.html # If the exit status is 0, everything is okay, otherwise the exit status # will be non-zero. We don't need stdout at the moment so we can discard # it. Stderr should contain any problems we run across. (status, _, err) = table_checksum(args[0], args[1], options.host) if status != 0: status_err(err.strip()) raise SystemExit(True) status_ok()
def main(): galera_hostname = args.galera_hostname holland_bin = args.holland_binary holland_bs = args.holland_backupset today = datetime.date.today().strftime('%Y%m%d') yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y%m%d') # Get completed Holland backup set backupsets = \ holland_lb_check(galera_hostname, holland_bin, holland_bs) if len( [backup for backup in backupsets if yesterday or today in backup[0]]) > 0: status_ok(m_name='maas_holland') metric_bool('holland_backup_status', True, m_name='maas_holland') else: metric_bool('holland_backup_status', False, m_name='maas_holland') status_err('Could not find Holland backup from %s or %s' % (yesterday, today), m_name='maas_holland') # Print metric about last backup print_metrics('holland_backup_size', "{0:.1f}".format(float(backupsets[-1][1]) / 1024))
def container_holland_lb_check(container, binary, backupset): backupsets = [] # Call holland directly inside container retcode, output, err = run_command('lxc-attach -n %s -- %s lb' % (container, binary)) if retcode > 0: status_err('Could not list holland backupsets: %s' % (err), m_name='maas_holland') for line in output.split(): if backupset + '/' in line: backupname = line.split('/')[-1] disksize = 0 # Determine size of the backup retcode, output, err = \ run_command('lxc-attach -n %s -- ' 'du -ks /var/backup/holland_backups/%s/%s' % (container, backupset, backupname)) if retcode == 0: disksize = output.split()[0] # Populate backupset informations backupsets.append([backupname, disksize]) return backupsets
def check(auth_ref, args): keystone = get_keystone_client(auth_ref) tenant_id = keystone.tenant_id COMPUTE_ENDPOINT = ( 'http://{ip}:8774/v2/{tenant_id}'.format(ip=args.ip, tenant_id=tenant_id) ) try: if args.ip: nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT) else: nova = get_nova_client() except Exception as e: status_err(str(e)) else: # get some cloud stats stats = nova.hypervisor_stats.statistics() cloud_stats = collections.defaultdict(dict) for metric_name, vals in stats_mapping.iteritems(): cloud_stats[metric_name]['value'] = \ getattr(stats, vals['stat_name']) cloud_stats[metric_name]['unit'] = \ vals['unit'] cloud_stats[metric_name]['type'] = \ vals['type'] status_ok() for metric_name in cloud_stats.iterkeys(): metric('cloud_resource_%s' % metric_name, cloud_stats[metric_name]['type'], cloud_stats[metric_name]['value'], cloud_stats[metric_name]['unit'])
def check(auth_ref, args): keystone = get_keystone_client(auth_ref) tenant_id = keystone.tenant_id HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format(ip=args.ip, tenant=tenant_id)) try: if args.ip: heat = get_heat_client(endpoint=HEAT_ENDPOINT) else: heat = get_heat_client() is_up = True except exc.HTTPException as e: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time.time() heat.build_info.build_info() end = time.time() milliseconds = (end - start) * 1000 metric('heat_api', 'heat_api_local_status', str(int(is_up))) if is_up: # only want to send other metrics if api is up metric('heat_api', 'heat_api_local_response_time', '%.3f' % milliseconds)
def main(args): bind_ip = str(args.ip) port = args.port is_up = True try: stats = item_stats(bind_ip, port) current_version = stats['version'] except (TypeError, IndexError): is_up = False metric_bool('client_success', False, m_name='maas_memcached') else: is_up = True metric_bool('client_success', True, m_name='maas_memcached') if current_version not in VERSIONS: status_err('This plugin has only been tested with version %s ' 'of memcached, and you are using version %s' % (VERSIONS, current_version), m_name='maas_memcached') status_ok(m_name='maas_memcached') metric_bool('memcache_api_local_status', is_up, m_name='maas_memcached') if is_up: for m, u in MEMCACHE_METRICS.iteritems(): metric('memcache_%s' % m, 'uint64', stats[m], u)
def main(args): if not args.processes: # The command line does not have any process names specified status_err('No executable names supplied') check_process_running(container_name=args.container, process_names=args.processes)
def check(auth_ref, args): ironic_endpoint = ('{protocol}://{ip}:{port}/v1'.format( ip=args.ip, protocol=args.protocol, port=args.port)) try: if args.ip: ironic = get_ironic_client(endpoint=ironic_endpoint) else: ironic = get_ironic_client() is_up = True except exc.ClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_ironic') status_err(str(e), m_name='maas_ironic') return else: metric_bool('client_success', True, m_name='maas_ironic') # pass limit=0 to list all nodes list without pagination all_nodes = ironic.node.list(limit=0) status_ok(m_name='maas_ironic') if is_up: maint_nodes = [node for node in all_nodes if node.maintenance] maint_nodes_count = len(maint_nodes) total_nodes = len(all_nodes) up_nodes = total_nodes - maint_nodes_count metric('ironic_up_nodes_count', 'uint32', up_nodes) metric('ironic_total_nodes_count', 'uint32', total_nodes)
def get_stats_from(args): stats = {} deploy_osp = args.deploy_osp if args.recon == 'async-pendings': stats = swift_async(swift_recon_path=args.swift_recon_path, deploy_osp=deploy_osp) elif args.recon == 'md5': stats = swift_md5(swift_recon_path=args.swift_recon_path, deploy_osp=deploy_osp) elif args.recon == 'quarantine': stats = swift_quarantine(swift_recon_path=args.swift_recon_path, deploy_osp=deploy_osp) elif args.recon == 'replication': if args.ring not in {"account", "container", "object"}: maas_common.status_err('no ring provided to check', m_name='maas_swift') stats = swift_replication(args.ring, swift_recon_path=args.swift_recon_path, deploy_osp=deploy_osp) elif args.recon == 'time': stats = swift_time(swift_recon_path=args.swift_recon_path, deploy_osp=deploy_osp) else: raise CommandNotRecognized('unrecognized command "{0}"'.format( args.recon)) return stats
def check(args): NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) is_up = True # if we get a NeutronClientException don't bother sending any other metric # The API IS DOWN except exc.NeutronClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time() neutron.list_agents() end = time() milliseconds = (end - start) * 1000 status_ok() metric_bool('neutron_api_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('neutron_api_local_response_time', 'uint32', '%.3f' % milliseconds, 'ms')
def get_container_name(deploy_osp, for_ring): container = None if not deploy_osp: # identify the container we will use for monitoring get_container = shlex.split( 'lxc-ls -1 --running ".*(swift_proxy|swift)"') try: containers_list = subprocess.check_output(get_container) container = containers_list.splitlines()[0] except (IndexError, subprocess.CalledProcessError): status_err('no running swift %s or proxy containers found' % for_ring, m_name='maas_swift') else: get_containers = ("/usr/local/bin/docker ps -f status=running") containers_list = subprocess.check_output(get_containers.split()) c = getcontainer('swift_proxy', containers_list) if c: container = c.split()[-1] return container
def check(args): IDENTITY_ENDPOINT = 'http://{ip}:35357/v3'.format(ip=args.ip) try: keystone = get_keystone_client(endpoint=IDENTITY_ENDPOINT) is_up = True except (exc.HttpServerError, exc.ClientException): is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time.time() keystone.services.list() end = time.time() milliseconds = (end - start) * 1000 # gather some vaguely interesting metrics to return project_count = len(keystone.projects.list()) user_count = len(keystone.users.list(domain='Default')) status_ok() metric_bool('keystone_api_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('keystone_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('keystone_user_count', 'uint32', user_count, 'users') metric('keystone_tenant_count', 'uint32', project_count, 'tenants') metric('keystone_tenant_count', 'uint32', project_count, 'tenants')
def main(): try: os.stat('/usr/sbin/ssacli') ssacli_bin = 'ssacli' except Exception: try: os.stat('/usr/sbin/hpssacli') ssacli_bin = 'hpssacli' except Exception: maas_common.status_err('Neither ssacli or hpssacli could be found', m_name='hp_monitoring') status = {} status['hardware_processors_status'] = \ get_chassis_status('hpasmcli', 'server') status['hardware_memory_status'] = get_chassis_status('hpasmcli', 'dimm') status['hardware_powersupply_status'] = \ get_powersupply_status('hpasmcli', 'powersupply') status['hardware_disk_status'] = get_drive_status(ssacli_bin) status['hardware_controller_status'] = get_controller_status(ssacli_bin) status['hardware_controller_cache_status'] = \ get_controller_cache_status(ssacli_bin) status['hardware_controller_battery_status'] = \ get_controller_battery_status(ssacli_bin) maas_common.status_ok(m_name='maas_hwvendor') for name, value in status.viewitems(): maas_common.metric_bool(name, value, m_name='maas_hwvendor')
def check(args): designate = get_openstack_client('dns') try: if args.ip: # Arbitrary call to /zones to ensure the local API is up designate_local_endpoint = generate_local_endpoint( str(designate.get_endpoint()), args.ip, args.port, args.protocol, '/zones') resp = designate.session.get(designate_local_endpoint, timeout=180) milliseconds = resp.elapsed.total_seconds() * 1000 # NOTE(npawelek): At the time of converting to OpenStack SDK, # DNS is not yet fully integrated. Excluding integration with # the client directly until a later time. api_is_up = resp.ok except (exc.HTTPError, exc.Timeout, exc.ConnectionError): api_is_up = False metric_bool('client_success', False, m_name='maas_designate') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_designate') status_err(str(e), m_name='maas_designate') else: metric_bool('client_success', True, m_name='maas_designate') status_ok(m_name='maas_designate') metric_bool('designate_api_local_status', api_is_up, m_name='maas_designate') if api_is_up: # only want to send other metrics if api is up metric('designate_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): glance = get_openstack_client('image') try: # Remove version from returned endpoint glance_endpoint = str(glance.get_endpoint().rsplit('/', 2)[0]) local_registry_url = generate_local_endpoint( glance_endpoint, args.ip, args.port, args.protocol, '/images' ) resp = glance.session.get(local_registry_url, timeout=180) milliseconds = resp.elapsed.total_seconds() * 1000 is_up = resp.status_code == 200 except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_glance') except Exception as e: metric_bool('client_success', False, m_name='maas_glance') status_err(str(e), m_name='maas_glance') status_ok(m_name='maas_glance') metric_bool('client_success', True, m_name='maas_glance') metric_bool('glance_registry_local_status', is_up, m_name='maas_glance') # Only send remaining metrics if the API is up if is_up: metric('glance_registry_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): metadata_endpoint = ('http://{ip}:8775'.format(ip=args.ip)) is_up = True s = requests.Session() try: # looks like we can only get / (ec2 versions) without specifying # an instance ID and other headers versions = s.get('%s/' % metadata_endpoint, verify=False, timeout=10) milliseconds = versions.elapsed.total_seconds() * 1000 if not versions.ok or '1.0' not in versions.content.splitlines(): is_up = False except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e: is_up = False except Exception as e: status_err(str(e)) metric_values = dict() status_ok() metric_bool('nova_api_metadata_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('nova_api_metadata_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric_values['nova_api_metadata_local_response_time'] = ('%.3f' % milliseconds) metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
def check(args): IDENTITY_ENDPOINT = 'http://{ip}:35357/v2.0'.format(ip=args.ip) try: keystone = get_keystone_client(endpoint=IDENTITY_ENDPOINT) is_up = True except (exc.HttpServerError, exc.ClientException): is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time() keystone.services.list() end = time() milliseconds = (end - start) * 1000 # gather some vaguely interesting metrics to return tenant_count = len(keystone.tenants.list()) user_count = len(keystone.users.list()) status_ok() metric_bool('keystone_api_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('keystone_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('keystone_user_count', 'uint32', user_count) metric('keystone_tenant_count', 'uint32', tenant_count)
def check(args): NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) # not gathering api status metric here so catch any exception except Exception as e: status_err(str(e)) # gather nova service states if args.host: agents = neutron.list_agents(host=args.host)['agents'] else: agents = neutron.list_agents()['agents'] if len(agents) == 0: status_err("No host(s) found in the agents list") # return all the things status_ok() for agent in agents: agent_is_up = True if agent['admin_state_up'] and not agent['alive']: agent_is_up = False if args.host: name = '%s_status' % agent['binary'] else: name = '%s_%s_on_host_%s' % (agent['binary'], agent['id'], agent['host']) metric_bool(name, agent_is_up)
def check(auth_ref, args): keystone = get_keystone_client(auth_ref) auth_token = keystone.auth_token VOLUME_ENDPOINT = 'http://{ip}:8776/v1/{tenant}' \ .format(ip=args.ip, tenant=keystone.tenant_id) s = requests.Session() s.headers.update({ 'Content-type': 'application/json', 'x-auth-token': auth_token }) try: r = s.get('%s/os-services' % VOLUME_ENDPOINT, verify=False, timeout=10) except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e: status_err(str(e)) if not r.ok: status_err('could not get response from cinder api') status_ok() services = r.json()['services'] for service in services: service_is_up = True if service['status'] == 'enabled' and service['state'] != 'up': service_is_up = False metric_bool('%s_on_host_%s' % (service['binary'], service['host']), service_is_up)
def check(auth_ref, args): # We call get_keystone_client here as there is some logic within to get a # new token if previous one is bad. keystone = get_keystone_client(auth_ref) auth_token = keystone.auth_token registry_endpoint = 'http://{ip}:9191'.format(ip=args.ip) s = Session() s.headers.update({ 'Content-type': 'application/json', 'x-auth-token': auth_token }) try: # /images returns a list of public, non-deleted images r = s.get('%s/images' % registry_endpoint, verify=False, timeout=10) is_up = r.ok except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False except Exception as e: status_err(str(e)) status_ok() metric_bool('glance_registry_local_status', is_up) # only want to send other metrics if api is up if is_up: milliseconds = r.elapsed.total_seconds() * 1000 metric('glance_registry_local_response_time', 'uint32', milliseconds)
def check(args): try: # attempt a query to example.com # return good check on any valid response start = datetime.datetime.now() message = dns.message.make_query("example.org", "A") answer = dns.query.udp(message, timeout=5, where=args.ip, port=5354) end = datetime.datetime.now() # int of return code mdns_is_up = (answer.rcode() <= 16) except (dns.exception.Timeout): mdns_is_up = False metric_bool('client_success', False, m_name='maas_designate') except Exception as e: metric_bool('client_success', False, m_name='maas_designate') status_err(str(e), m_name='maas_designate') else: metric_bool('client_success', True, m_name='maas_designate') dt = (end - start) milliseconds = (dt.microseconds + dt.seconds * 10 ** 6) / 10 ** 3 status_ok(m_name='maas_designate') metric_bool('designate_mdns_local_status', mdns_is_up, m_name='maas_designate') if mdns_is_up: # only want to send other metrics if api is up metric('designate_mdns_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def main(args): if len(args.omc) != 2: args = ' '.join(args.omc) status_err( 'Requires 2 arguments, arguments provided: "%s"' % args, m_name='maas_hwvendor' ) report_type = args.omc[0].lower() report_request = args.omc[1].lower() # If we're not using the correct version of OpenManage, error out check_openmanage_version() try: report = hardware_report(report_type, report_request) except (OSError, subprocess.CalledProcessError) as e: metric_bool('hardware_%s_status' % report_request, False) status_err(str(e), m_name='maas_hwvendor') status_ok(m_name='maas_hwvendor') if report_request == 'pwrsupplies': metric_bool('hardware_%s_status' % report_request, all_okay(report, regex[report_request])) else: metric_bool('hardware_%s_status' % report_request, all_okay(report, regex[report_type]))
def check(auth_ref, args): MAGNUM_ENDPOINT = 'http://{ip}:9511/v1'.format(ip=args.ip, ) try: if args.ip: magnum = get_magnum_client(endpoint=MAGNUM_ENDPOINT) else: magnum = get_magnum_client() api_is_up = True except exc.HttpError as e: api_is_up = False metric_bool('client_success', False, m_name='maas_magnum') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_magnum') status_err(str(e), m_name='maas_magnum') else: metric_bool('client_success', True, m_name='maas_magnum') # time something arbitrary start = time.time() magnum.cluster_templates.list() end = time.time() milliseconds = (end - start) * 1000 status_ok(m_name='maas_magnum') metric_bool('magnum_api_local_status', api_is_up, m_name='maas_magnum') if api_is_up: # only want to send other metrics if api is up metric('magnum_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): COMPUTE_ENDPOINT = 'http://{ip}:8774/v3'.format(ip=args.ip) try: nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT) # not gathering api status metric here so catch any exception except Exception as e: status_err(str(e)) # gather nova service states if args.host: services = nova.services.list(host=args.host) else: services = nova.services.list() if len(services) == 0: status_err("No host(s) found in the service list") # return all the things status_ok() for service in services: service_is_up = True if service.status == 'enabled' and service.state == 'down': service_is_up = False if args.host: name = '%s_status' % service.binary else: name = '%s_on_host_%s_status' % (service.binary, service.host) metric_bool(name, service_is_up)
def check(auth_ref, args): OCTAVIA_ENDPOINT = 'http://{ip}:9876/v1'.format(ip=args.ip,) try: if args.ip: endpoint = OCTAVIA_ENDPOINT else: endpoint = get_endpoint_url_for_service( 'load-balancer', auth_ref, 'internal') # time something arbitrary start = datetime.datetime.now() r = requests.get(endpoint + "/v1/loadbalancers?limit=1") end = datetime.datetime.now() api_is_up = (r.status_code == 200) except (requests.HTTPError, requests.Timeout, requests.ConnectionError): api_is_up = False metric_bool('client_success', False, m_name='maas_octavia') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_octavia') status_err(str(e), m_name='maas_octavia') else: metric_bool('client_success', True, m_name='maas_octavia') dt = (end - start) milliseconds = (dt.microseconds + dt.seconds * 10 ** 6) / 10 ** 3 status_ok(m_name='maas_octavia') metric_bool('octavia_api_local_status', api_is_up, m_name='maas_octavia') if api_is_up: # only want to send other metrics if api is up metric('octavia_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): heat = get_openstack_client('orchestration') try: local_heat_endpoint = generate_local_endpoint(str(heat.get_endpoint()), args.ip, args.port, args.protocol, '/build_info') resp = heat.session.get(local_heat_endpoint) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False metric_bool('client_success', False, m_name='maas_heat') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_heat') status_err(str(e), m_name='maas_heat') else: is_up = True milliseconds = resp.elapsed.total_seconds() * 1000 metric_bool('client_success', True, m_name='maas_heat') status_ok(m_name='maas_heat') metric_bool('heat_api_local_status', is_up, m_name='maas_heat') if is_up: # only want to send other metrics if api is up metric('heat_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(auth_ref, args): ironic_endpoint = ('{protocol}://{ip}:{port}/v1'.format( ip=args.ip, protocol=args.protocol, port=args.port)) try: if args.ip: ironic = get_ironic_client(endpoint=ironic_endpoint) else: ironic = get_ironic_client() is_up = True except exc.ClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_ironic') status_err(str(e), m_name='maas_ironic') else: metric_bool('client_success', True, m_name='maas_ironic') # time something arbitrary start = time.time() ironic.node.list() end = time.time() milliseconds = (end - start) * 1000 status_ok(m_name='maas_ironic') metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic') if is_up: # only want to send other metrics if api is up metric('ironic_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms')
def check(args): metadata_endpoint = ('http://{ip}:8775'.format(ip=args.ip)) is_up = True s = requests.Session() try: # looks like we can only get / (ec2 versions) without specifying # an instance ID and other headers versions = s.get('%s/' % metadata_endpoint, verify=False, timeout=10) milliseconds = versions.elapsed.total_seconds() * 1000 if not versions.ok or '1.0' not in versions.content.splitlines(): is_up = False except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e: is_up = False except Exception as e: status_err(str(e)) status_ok() metric_bool('nova_api_metadata_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('nova_api_metadata_local_response_time', 'double', '%.3f' % milliseconds, 'ms')