def check(auth_ref, args): # We call get_keystone_client here as there is some logic within to get a # new token if previous one is bad. keystone = get_keystone_client(auth_ref) auth_token = keystone.auth_token registry_endpoint = 'http://{ip}:9191'.format(ip=args.ip) s = requests.Session() s.headers.update({ 'Content-type': 'application/json', 'x-auth-token': auth_token }) try: # /images returns a list of public, non-deleted images r = s.get('%s/images' % registry_endpoint, verify=False, timeout=10) is_up = r.ok except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_up = False except Exception as e: status_err(str(e)) metric_values = dict() status_ok() metric_bool('glance_registry_local_status', is_up) # only want to send other metrics if api is up if is_up: milliseconds = r.elapsed.total_seconds() * 1000 metric('glance_registry_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric_values['glance_registry_local_response_time'] = ('%.3f' % milliseconds) metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
def check(args): metadata_endpoint = ('http://{ip}:8775'.format(ip=args.ip)) is_up = True s = requests.Session() try: # looks like we can only get / (ec2 versions) without specifying # an instance ID and other headers versions = s.get('%s/' % metadata_endpoint, verify=False, timeout=10) milliseconds = versions.elapsed.total_seconds() * 1000 if not versions.ok or '1.0' not in versions.content.splitlines(): is_up = False except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e: is_up = False except Exception as e: status_err(str(e)) metric_values = dict() status_ok() metric_bool('nova_api_metadata_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('nova_api_metadata_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric_values['nova_api_metadata_local_response_time'] = ('%.3f' % milliseconds) metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
def check(args): headers = {'Content-type': 'application/json'} path_options = {} if args.auth: auth_ref = get_auth_ref() keystone = get_keystone_client(auth_ref) auth_token = keystone.auth_token project_id = keystone.project_id headers['auth_token'] = auth_token path_options['project_id'] = project_id scheme = args.ssl and 'https' or 'http' endpoint = '{scheme}://{ip}:{port}'.format(ip=args.ip, port=args.port, scheme=scheme) if args.version is not None: path_options['version'] = args.version path = args.path.format(path_options) s = requests.Session() s.headers.update(headers) if path and not path.startswith('/'): url = '/'.join((endpoint, path)) else: url = ''.join((endpoint, path)) try: r = s.get(url, verify=False, timeout=10) except (exc.ConnectionError, exc.HTTPError, exc.Timeout): up = False else: up = True status_ok() metric_bool('{name}_api_local_status'.format(name=args.name), up) if up and r.ok: milliseconds = r.elapsed.total_seconds() * 1000 metric('{name}_api_local_response_time'.format(name=args.name), 'double', '%.3f' % milliseconds, 'ms') metric_values['{name}_api_local_response_time'.format( name=args.name)] = ('%.3f' % milliseconds) metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
def check(auth_ref, args): GLANCE_ENDPOINT = ('http://{ip}:9292/v1'.format(ip=args.ip)) try: if args.ip: glance = get_glance_client(endpoint=GLANCE_ENDPOINT) else: glance = get_glance_client() is_up = True except exc.HTTPException: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time.time() glance.images.list(search_opts={'all_tenants': 1}) end = time.time() milliseconds = (end - start) * 1000 # gather some metrics images = glance.images.list(search_opts={'all_tenants': 1}) status_count = collections.Counter([s.status for s in images]) metric_values = dict() status_ok() metric_bool('glance_api_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('glance_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric_values['glance_api_local_response_time'] = ('%.3f' % milliseconds) for status in IMAGE_STATUSES: metric('glance_%s_images' % status, 'uint32', status_count[status], 'images') metric_values[('glance_%s_images' % status)] = status_count[status] metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
def check(auth_ref, args): keystone = get_keystone_client(auth_ref) tenant_id = keystone.tenant_id COMPUTE_ENDPOINT = ('http://{ip}:8774/v2/{tenant_id}'.format( ip=args.ip, tenant_id=tenant_id)) try: if args.ip: nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT) else: nova = get_nova_client() except Exception as e: status_err(str(e)) else: # get some cloud stats stats = nova.hypervisor_stats.statistics() cloud_stats = collections.defaultdict(dict) for metric_name, vals in stats_mapping.iteritems(): multiplier = 1 if metric_name == 'total_vcpus': multiplier = args.cpu_allocation_ratio elif metric_name == 'total_memory': multiplier = args.mem_allocation_ratio cloud_stats[metric_name]['value'] = \ (getattr(stats, vals['stat_name']) * multiplier) cloud_stats[metric_name]['unit'] = \ vals['unit'] cloud_stats[metric_name]['type'] = \ vals['type'] metric_values = dict() status_ok() for metric_name in cloud_stats.iterkeys(): metric('cloud_resource_%s' % metric_name, cloud_stats[metric_name]['type'], cloud_stats[metric_name]['value'], cloud_stats[metric_name]['unit']) metric_values['cloud_resource_%s' % metric_name] = cloud_stats[metric_name]['value'] metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
def check(auth_ref, args): keystone = get_keystone_client(auth_ref) tenant_id = keystone.tenant_id HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format(ip=args.ip, tenant=tenant_id)) try: if args.ip: heat = get_heat_client(endpoint=HEAT_ENDPOINT) else: heat = get_heat_client() is_up = True except exc.HTTPException as e: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time.time() heat.build_info.build_info() end = time.time() milliseconds = (end - start) * 1000 metric_values = dict() status_ok() metric_bool('heat_api_local_status', is_up) if is_up: # only want to send other metrics if api is up metric('heat_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric_values['heat_api_local_response_time'] = ('%.3f' % milliseconds) metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
def check(args): NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip) try: if args.ip: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) else: neutron = get_neutron_client() is_up = True # if we get a NeutronClientException don't bother sending any other metric # The API IS DOWN except exc.NeutronClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time.time() neutron.list_agents() end = time.time() milliseconds = (end - start) * 1000 # gather some metrics networks = len(neutron.list_networks()['networks']) agents = len(neutron.list_agents()['agents']) # more metrics : router info routers = neutron.list_routers()['routers'] routers_active = [router for router in routers if router['status'] == 'ACTIVE'] routers_down = [router for router in routers if router['status'] == 'DOWN'] subnets = len(neutron.list_subnets()['subnets']) # more metrics : port information ports = neutron.list_ports()['ports'] ports_active = [port for port in ports if port['status'] == 'ACTIVE'] ports_build = [port for port in ports if port['status'] == 'BUILD'] ports_down = [port for port in ports if port['status'] == 'DOWN'] metric_values = dict() status_ok() metric_bool('neutron_api_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('neutron_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('neutron_networks', 'uint32', networks, 'networks') metric('neutron_agents', 'uint32', agents, 'agents') metric('neutron_routers', 'uint32', routers, 'agents') metric('neutron_subnets', 'uint32', subnets, 'subnets') metric_values['neutron_api_local_response_time'] = ('%.3f' % milliseconds) metric_values['neutron_networks'] = networks metric_values['neutron_agents'] = agents metric_values['neutron_routers'] = len(routers) metric_values['neutron_routers_in_status_ACTIVE'] = len(routers_active) metric_values['neutron_routers_in_status_DOWN'] = len(routers_down) metric_values['neutron_subnets'] = subnets metric_values['neutron_ports'] = len(ports) metric_values['neutron_ports_in_status_ACTIVE'] = len(ports_active) metric_values['neutron_ports_in_status_BUILD'] = len(ports_build) metric_values['neutron_ports_in_status_DOWN'] = len(ports_down) metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
def check(args, auth_details): if auth_details['OS_AUTH_VERSION'] == '2': IDENTITY_ENDPOINT = 'http://{ip}:35357/v2.0'.format(ip=args.ip) else: IDENTITY_ENDPOINT = 'http://{ip}:35357/v3'.format(ip=args.ip) try: if args.ip: # keystone = get_keystone_client(endpoint=IDENTITY_ENDPOINT) ceilometer = get_ceilometer_client() else: # keystone = get_keystone_client() ceilometer = get_ceilometer_client() is_up = True except (exc.HttpServerError, exc.ClientException): is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary # start = time.time() # keystone.services.list() # end = time.time() # milliseconds = (end - start) * 1000 # # gather some vaguely interesting metrics to return # if auth_details['OS_AUTH_VERSION'] == '2': # project_count = len(keystone.tenants.list()) # user_count = len(keystone.users.list()) # else: # project_count = len(keystone.projects.list()) # user_count = len(keystone.users.list(domain='Default')) metric_values = dict() for meter_name, fields in KEYSTONE_METERS.iteritems(): # gather ceilometer stats stats = ceilometer.statistics.list(meter_name) # 'trim' the meter name a bit metric_name = "keystone_" metric_name = metric_name + meter_name.replace(".","_") + "_" for stat in stats: for field in fields: value = getattr(stat, field) if (field == "duration"): count = metric_values[metric_name + "count"] value = value / count field = "avg_time" metric_values[metric_name + field] = value print(metric_values) # status_ok() # metric_bool('keystone_api_local_status', is_up) # # only want to send other metrics if api is up is_up = True if is_up: metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
def check(args): # disable warning for insecure cert on horizon if requests.__build__ >= 0x020400: requests.packages.urllib3.disable_warnings() splash_status_code = 0 splash_milliseconds = 0.0 login_status_code = 0 login_milliseconds = 0.0 is_up = True auth_details = get_auth_details() OS_USERNAME = auth_details['OS_USERNAME'] OS_PASSWORD = auth_details['OS_PASSWORD'] HORIZON_URL = 'https://{ip}'.format(ip=args.ip) HORIZON_PORT = '443' s = requests.Session() try: r = s.get('%s:%s' % (HORIZON_URL, HORIZON_PORT), verify=False, timeout=10) except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e: is_up = False else: if not (r.ok and re.search('openstack dashboard', r.content, re.IGNORECASE)): status_err('could not load login page') splash_status_code = r.status_code splash_milliseconds = r.elapsed.total_seconds() * 1000 csrf_token = html.fromstring(r.content).xpath( '//input[@name="csrfmiddlewaretoken"]/@value')[0] region = html.fromstring(r.content).xpath( '//input[@name="region"]/@value')[0] s.headers.update( {'Content-type': 'application/x-www-form-urlencoded', 'Referer': HORIZON_URL}) payload = {'username': OS_USERNAME, 'password': OS_PASSWORD, 'csrfmiddlewaretoken': csrf_token, 'region': region} try: l = s.post( ('%s:%s/auth/login/') % (HORIZON_URL, HORIZON_PORT), data=payload, verify=False) except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e: status_err('While logging in: %s' % e) if not (l.ok and re.search('overview', l.content, re.IGNORECASE)): status_err('could not log in') login_status_code = l.status_code login_milliseconds = l.elapsed.total_seconds() * 1000 metric_values = dict() status_ok() metric_bool('horizon_local_status', is_up) if is_up: metric('splash_status_code', 'uint32', splash_status_code, 'http_code') metric('splash_milliseconds', 'double', splash_milliseconds, 'ms') metric('login_status_code', 'uint32', login_status_code, 'http_code') metric('login_milliseconds', 'double', login_milliseconds, 'ms') metric_values['splash_status_code'] = splash_status_code metric_values['splash_milliseconds'] = splash_milliseconds metric_values['login_status_code'] = login_status_code metric_values['login_milliseconds'] = login_milliseconds metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
def check(auth_ref, args): keystone = get_keystone_client(auth_ref) tenant_id = keystone.tenant_id COMPUTE_ENDPOINT = ( 'http://{ip}:8774/v2/{tenant_id}'.format(ip=args.ip, tenant_id=tenant_id) ) try: if args.ip: nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT) ceilometer = get_ceilometer_client() else: nova = get_nova_client() ceilometer = get_ceilometer_client() is_up = True except exc.ClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time.time() nova.services.list() end = time.time() milliseconds = (end - start) * 1000 metric_values = dict() servers = nova.servers.list(search_opts={'all_tenants': 1}) # gather some metrics status_count = collections.Counter([s.status for s in servers]) for meter_name, fields in NOVA_METERS.iteritems(): # gather ceilometer stats stats = ceilometer.statistics.list(meter_name) # 'trim' the meter name a bit metric_name = "nova_instances_" if "instance" != meter_name: metric_name = metric_name + meter_name.replace(".","_") + "_" for stat in stats: for field in fields: value = getattr(stat, field) metric_values[metric_name + field] = value status_ok() metric_bool('nova_api_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('nova_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric_values['nova_api_local_response_time'] = ("%.3f" % (milliseconds)) for status in SERVER_STATUSES: metric('nova_instances_in_state_%s' % status, 'uint32', status_count[status], 'instances') metric_values[("nova_instances_in_state_%s" % (status))] = status_count[status] metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)