def check(): try: NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=CONFIGS['ip']) try: if CONFIGS['ip']: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) else: neutron = get_neutron_client() is_up = True # if we get a NeutronClientException don't bother sending # any other metric The API IS DOWN except exc.NeutronClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time.time() neutron.list_agents() end = time.time() milliseconds = (end - start) * 1000 # gather some metrics networks = len(neutron.list_networks()['networks']) agents = len(neutron.list_agents()['agents']) routers = len(neutron.list_routers()['routers']) subnets = len(neutron.list_subnets()['subnets']) status_ok() metric_bool(PLUGIN, 'neutron_api_local_status', is_up, graphite_host=CONFIGS['graphite_host'], graphite_port=CONFIGS['graphite_port']) # only want to send other metrics if api is up if is_up: metric(PLUGIN, 'neutron_api_local_response_time', '%.3f' % milliseconds, graphite_host=CONFIGS['graphite_host'], graphite_port=CONFIGS['graphite_port']) metric(PLUGIN, 'neutron_networks', networks, graphite_host=CONFIGS['graphite_host'], graphite_port=CONFIGS['graphite_port']) metric(PLUGIN, 'neutron_agents', agents, graphite_host=CONFIGS['graphite_host'], graphite_port=CONFIGS['graphite_port']) metric(PLUGIN, 'neutron_routers', routers, graphite_host=CONFIGS['graphite_host'], graphite_port=CONFIGS['graphite_port']) metric(PLUGIN, 'neutron_subnets', subnets, graphite_host=CONFIGS['graphite_host'], graphite_port=CONFIGS['graphite_port']) except: metric_bool(PLUGIN, 'neutron_api_local_status', False, graphite_host=CONFIGS['graphite_host'], graphite_port=CONFIGS['graphite_port']) raise
def check(args): network_endpoint = '{protocol}://{ip}:{port}'.format( ip=args.ip, protocol=args.protocol, port=args.port ) is_up = False try: if args.ip: neutron = get_neutron_client(endpoint_url=network_endpoint) else: neutron = get_neutron_client() is_up = True # if we get a NeutronClientException don't bother sending any other metric # The API IS DOWN except exc.NeutronClientException: metric_bool('client_success', False, m_name='maas_neutron') # Any other exception presumably isn't an API error except Exception as e: metric_bool('client_success', False, m_name='maas_neutron') status_err(str(e), m_name='maas_neutron') else: metric_bool('client_success', True, m_name='maas_neutron') # time something arbitrary start = time.time() neutron.list_agents() end = time.time() milliseconds = (end - start) * 1000 # gather some metrics networks = len(neutron.list_networks()['networks']) agents = len(neutron.list_agents()['agents']) routers = len(neutron.list_routers()['routers']) subnets = len(neutron.list_subnets()['subnets']) status_ok(m_name='maas_neutron') metric_bool('neutron_api_local_status', is_up, m_name='maas_neutron') # only want to send other metrics if api is up if is_up: metric('neutron_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('neutron_networks', 'uint32', networks, 'networks') metric('neutron_agents', 'uint32', agents, 'agents') metric('neutron_routers', 'uint32', routers, 'agents') metric('neutron_subnets', 'uint32', subnets, 'subnets')
def check(args): NETWORK_ENDPOINT = 'http://{hostname}:9696'.format(hostname=args.hostname) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) # not gathering api status metric here so catch any exception except Exception as e: status_err(str(e)) # gather nova service states if args.host: agents = neutron.list_agents(host=args.host)['agents'] else: agents = neutron.list_agents()['agents'] if len(agents) == 0: status_err("No host(s) found in the agents list") # return all the things for agent in agents: agent_is_up = True if agent['admin_state_up'] and not agent['alive']: agent_is_up = False if args.host: name = '%s_status' % agent['binary'] else: name = '%s_%s_on_host_%s' % (agent['binary'], agent['id'], agent['host']) metric('neutron_service', name, str(int(agent_is_up)))
def check(args): NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) # not gathering api status metric here so catch any exception except Exception as e: status_err(str(e)) # gather nova service states if args.host: agents = neutron.list_agents(host=args.host)['agents'] else: agents = neutron.list_agents()['agents'] if len(agents) == 0: status_err("No host(s) found in the agents list") # return all the things status_ok() for agent in agents: agent_is_up = True if agent['admin_state_up'] and not agent['alive']: agent_is_up = False if args.host: name = '%s_status' % agent['binary'] else: name = '%s_%s_on_host_%s' % (agent['binary'], agent['id'], agent['host']) metric_bool(name, agent_is_up)
def check(args): NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) is_up = True # if we get a NeutronClientException don't bother sending any other metric # The API IS DOWN except exc.NeutronClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time() neutron.list_agents() end = time() milliseconds = (end - start) * 1000 status_ok() metric_bool('neutron_api_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('neutron_api_local_response_time', 'uint32', '%.3f' % milliseconds, 'ms')
def check(args): NETWORK_ENDPOINT = "http://{hostname}:9696".format(hostname=args.hostname) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) # not gathering api status metric here so catch any exception except Exception as e: status_err(str(e)) # gather nova service states if args.host: agents = neutron.list_agents(host=args.host)["agents"] else: agents = neutron.list_agents()["agents"] if len(agents) == 0: status_err("No host(s) found in the agents list") # return all the things status_ok() for agent in agents: agent_is_up = True if agent["admin_state_up"] and not agent["alive"]: agent_is_up = False if args.host: name = "%s_status" % agent["binary"] else: name = "%s_%s_on_host_%s" % (agent["binary"], agent["id"], agent["host"]) metric_bool(name, agent_is_up)
def check(args): # identify the container we will use for monitoring try: containers_list = subprocess.check_output(FIND_CONTAINER) container = containers_list.splitlines()[0] except (IndexError, subprocess.CalledProcessError): metric_bool('agents_found', False, m_name='maas_neutron') status_err('no running neutron agents containers found', m_name='maas_neutron') else: metric_bool('agents_found', True, m_name='maas_neutron') network_endpoint = '{protocol}://{host}:{port}'.format( host=args.neutron_host, protocol=args.protocol, port=args.port ) try: neutron = get_neutron_client(endpoint_url=network_endpoint) # not gathering api status metric here so catch any exception except Exception as e: metric_bool('client_success', False, m_name='maas_neutron') status_err(str(e), m_name='maas_neutron') else: metric_bool('client_success', True, m_name='maas_neutron') # only check networks which have a port with DHCP enabled ports = neutron.list_ports(device_owner='network:dhcp')['ports'] nets = set([p['network_id'] for p in ports]) # perform checks for each identified network failures = [] for net_id in nets: namespace = 'qdhcp-%s' % net_id service_check_cmd = SERVICE_CHECK % namespace command = shlex.split('lxc-attach -n %s -- %s' % (container, service_check_cmd)) try: subprocess.check_output(command, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: # HTTP 404 response indicates the service is responsive. # this is the expected response because the maas testing host IP # is used to look up metadata and no metadata exists for this IP if '404 Not Found' not in e.output: failures.append(net_id) is_ok = len(failures) == 0 metric_bool('neutron-metadata-agent-proxy_status', is_ok, m_name='maas_neutron') if is_ok: status_ok(m_name='maas_neutron') else: status_err('neutron metadata agent proxies fail on host %s ' 'net_ids: %s' % (container, ','.join(failures)), m_name='maas_neutron')
def check(args): # identify the container we will use for monitoring try: containers_list = subprocess.check_output(FIND_CONTAINER) container = containers_list.splitlines()[0] except (IndexError, subprocess.CalledProcessError): metric_bool('agents_found', False, m_name='maas_neutron') status_err('no running neutron agents containers found', m_name='maas_neutron') else: metric_bool('agents_found', True, m_name='maas_neutron') network_endpoint = '{protocol}://{host}:{port}'.format( host=args.neutron_host, protocol=args.protocol, port=args.port) try: neutron = get_neutron_client(endpoint_url=network_endpoint) # not gathering api status metric here so catch any exception except Exception as e: metric_bool('client_success', False, m_name='maas_neutron') status_err(str(e), m_name='maas_neutron') else: metric_bool('client_success', True, m_name='maas_neutron') # only check networks which have a port with DHCP enabled ports = neutron.list_ports(device_owner='network:dhcp')['ports'] nets = set([p['network_id'] for p in ports]) # perform checks for each identified network failures = [] for net_id in nets: namespace = 'qdhcp-%s' % net_id service_check_cmd = SERVICE_CHECK % namespace command = shlex.split('lxc-attach -n %s -- %s' % (container, service_check_cmd)) try: subprocess.check_output(command, stderr=subprocess.STDOUT) except subprocess.CalledProcessError as e: # HTTP 404 response indicates the service is responsive. # this is the expected response because the maas testing host IP # is used to look up metadata and no metadata exists for this IP if '404 Not Found' not in e.output: failures.append(net_id) is_ok = len(failures) == 0 metric_bool('neutron-metadata-agent-proxy_status', is_ok, m_name='maas_neutron') if is_ok: status_ok(m_name='maas_neutron') else: status_err('neutron metadata agent proxies fail on host %s ' 'net_ids: %s' % (container, ','.join(failures)), m_name='maas_neutron')
def check(): try: error_num = dict(int) NETWORK_ENDPOINT = 'http://{hostname}:9696'\ .format(hostname=CONFIGS['ip']) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) # not gathering api status metric here so catch any exception except Exception as e: status_err(str(e)) # gather nova service states if CONFIGS['host']: agents = neutron.list_agents(host=CONFIGS['host'])['agents'] else: agents = neutron.list_agents()['agents'] if len(agents) == 0: status_err("No host(s) found in the agents list") # return all the things status_ok() for agent in agents: agent_is_up = True if agent['admin_state_up'] and not agent['alive']: agent_is_up = False if CONFIGS['host']: name = '%s_status' % agent['binary'] else: name = '%s.%s_%s' % (agent['binary'], agent['host'], agent['id']) if agent['binary'] not in error_num: error_num[agent['binary']] = 0 if not agent_is_up: error_num[agent['binary']] += 1 metric_bool(PLUGIN, name, agent_is_up, graphite_host=CONFIGS['graphite_host'], graphite_port=CONFIGS['graphite_port']) for k, v in error_num.items(): metric(PLUGIN, "{}_error_num".format(k), v, graphite_host=CONFIGS['graphite_host'], graphite_port=CONFIGS['graphite_port']) metric_bool(PLUGIN, "{}_status".format(PLUGIN), True, graphite_host=CONFIGS['graphite_host'], graphite_port=CONFIGS['graphite_port']) except: metric_bool(PLUGIN, "{}_status".format(PLUGIN), False, graphite_host=CONFIGS['graphite_host'], graphite_port=CONFIGS['graphite_port']) raise
def check(args): NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip) try: if args.ip: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) else: neutron = get_neutron_client() is_up = True # if we get a NeutronClientException don't bother sending any other metric # The API IS DOWN except exc.NeutronClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time.time() neutron.list_agents() end = time.time() milliseconds = (end - start) * 1000 # gather some metrics networks = len(neutron.list_networks()['networks']) agents = len(neutron.list_agents()['agents']) routers = len(neutron.list_routers()['routers']) subnets = len(neutron.list_subnets()['subnets']) metric('neutron_api', 'neutron_api_local_status', str(int(is_up))) # only want to send other metrics if api is up if is_up: metric('neutron_api', 'neutron_api_local_response_time', '%.3f' % milliseconds) metric('neutron_api', 'neutron_networks', networks) metric('neutron_api', 'neutron_agents', agents) metric('neutron_api', 'neutron_routers_agents', routers) metric('neutron_api', 'neutron_subnets', subnets)
def check(args): NETWORK_ENDPOINT = '{protocol}://{hostname}:9696'.format( protocol=args.protocol, hostname=args.hostname) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) # not gathering api status metric here so catch any exception except Exception as e: metric_bool('client_success', False, m_name='maas_neutron') for neutron_agent_type in NEUTRON_AGENT_TYPE_LIST: metric('%s_status' % neutron_agent_type, 'string', '%s cannot reach API' % neutron_agent_type, m_name='maas_neutron') status_err_no_exit(str(e), m_name='maas_neutron') return else: metric_bool('client_success', True, m_name='maas_neutron') # gather neutron service states if args.host: agents = neutron.list_agents(host=args.host)['agents'] elif args.fqdn: agents = neutron.list_agents(host=args.fqdn)['agents'] else: agents = neutron.list_agents()['agents'] if len(agents) == 0: metric_bool('agents_found', False, m_name='maas_neutron') status_err("No host(s) found in the agents list", m_name='maas_neutron') else: metric_bool('agents_found', True, m_name='maas_neutron') # return all the things status_ok(m_name='maas_neutron') for agent in agents: agent_is_up = "Yes" if agent['admin_state_up'] and not agent['alive']: agent_is_up = "No" if args.host: name = '%s_status' % agent['binary'] elif args.fqdn: name = '%z_status' % agent['binary'] else: name = '%s_%s_on_host_%s' % (agent['binary'], agent['id'], agent['host']) metric(name, 'string', agent_is_up, m_name='maas_neutron')
def check(args): NETWORK_ENDPOINT = '{protocol}://{hostname}:9696'.format( protocol=args.protocol, hostname=args.hostname) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) # not gathering api status metric here so catch any exception except Exception as e: metric_bool('client_success', False, m_name='maas_neutron') for neutron_agent_type in NEUTRON_AGENT_TYPE_LIST: metric('%s_status' % neutron_agent_type, 'string', '%s cannot reach API' % neutron_agent_type, m_name='maas_neutron') status_err_no_exit(str(e), m_name='maas_neutron') return else: metric_bool('client_success', True, m_name='maas_neutron') # gather neutron service states if args.host: agents = neutron.list_agents(host=args.host)['agents'] elif args.fqdn: agents = neutron.list_agents(host=args.fqdn)['agents'] else: agents = neutron.list_agents()['agents'] if len(agents) == 0: metric_bool('agents_found', False, m_name='maas_neutron') status_err("No host(s) found in the agents list", m_name='maas_neutron') else: metric_bool('agents_found', True, m_name='maas_neutron') # return all the things status_ok(m_name='maas_neutron') for agent in agents: agent_is_up = "Yes" if agent['admin_state_up'] and not agent['alive']: agent_is_up = "No" if args.host: name = '%s_status' % agent['binary'] elif args.fqdn: name = '%z_status' % agent['binary'] else: name = '%s_%s_on_host_%s' % (agent['binary'], agent['id'], agent['host']) metric(name, 'string', agent_is_up, m_name='maas_neutron')
def check(args): NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) is_up = True # if we get a NeutronClientException don't bother sending any other metric # The API IS DOWN except exc.NeutronClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time() neutron.list_agents() end = time() milliseconds = (end - start) * 1000 # gather some metrics networks = len(neutron.list_networks()['networks']) agents = len(neutron.list_agents()['agents']) routers = len(neutron.list_routers()['routers']) subnets = len(neutron.list_subnets()['subnets']) ports = len(neutron.list_ports()['ports']) floatingips = len(neutron.list_floatingips()['floatingips']) status_ok() metric_bool('neutron_api_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('neutron_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('neutron_networks', 'uint32', networks, 'networks') metric('neutron_agents', 'uint32', agents, 'agents') metric('neutron_routers', 'uint32', routers, 'agents') metric('neutron_subnets', 'uint32', subnets, 'subnets') metric('neutron_ports', 'uint32', ports, 'ports') metric('neutron_floatingips', 'uint32', floatingips, 'floatingips')
def check(args): NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip) try: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) # not gathering api status metric here so catch any exception except Exception as e: status_err(str(e)) # gather nova service states agents = neutron.list_agents()['agents'] # return all the things status_ok() for agent in agents: agent_is_up = True if agent['admin_state_up'] and not agent['alive']: agent_is_up = False metric_bool('%s_%s_on_host_%s' % (agent['binary'], agent['id'], agent['host']), agent_is_up)
def check(args): NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip) try: if args.ip: neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT) else: neutron = get_neutron_client() is_up = True # if we get a NeutronClientException don't bother sending any other metric # The API IS DOWN except exc.NeutronClientException: is_up = False # Any other exception presumably isn't an API error except Exception as e: status_err(str(e)) else: # time something arbitrary start = time.time() neutron.list_agents() end = time.time() milliseconds = (end - start) * 1000 # gather some metrics networks = len(neutron.list_networks()['networks']) agents = len(neutron.list_agents()['agents']) # more metrics : router info routers = neutron.list_routers()['routers'] routers_active = [router for router in routers if router['status'] == 'ACTIVE'] routers_down = [router for router in routers if router['status'] == 'DOWN'] subnets = len(neutron.list_subnets()['subnets']) # more metrics : port information ports = neutron.list_ports()['ports'] ports_active = [port for port in ports if port['status'] == 'ACTIVE'] ports_build = [port for port in ports if port['status'] == 'BUILD'] ports_down = [port for port in ports if port['status'] == 'DOWN'] metric_values = dict() status_ok() metric_bool('neutron_api_local_status', is_up) # only want to send other metrics if api is up if is_up: metric('neutron_api_local_response_time', 'double', '%.3f' % milliseconds, 'ms') metric('neutron_networks', 'uint32', networks, 'networks') metric('neutron_agents', 'uint32', agents, 'agents') metric('neutron_routers', 'uint32', routers, 'agents') metric('neutron_subnets', 'uint32', subnets, 'subnets') metric_values['neutron_api_local_response_time'] = ('%.3f' % milliseconds) metric_values['neutron_networks'] = networks metric_values['neutron_agents'] = agents metric_values['neutron_routers'] = len(routers) metric_values['neutron_routers_in_status_ACTIVE'] = len(routers_active) metric_values['neutron_routers_in_status_DOWN'] = len(routers_down) metric_values['neutron_subnets'] = subnets metric_values['neutron_ports'] = len(ports) metric_values['neutron_ports_in_status_ACTIVE'] = len(ports_active) metric_values['neutron_ports_in_status_BUILD'] = len(ports_build) metric_values['neutron_ports_in_status_DOWN'] = len(ports_down) metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
def check(auth_ref): # **router = args.routerid auth_token = auth_ref['token']['id'] # Use internalURL as this is a local plugin endpoint = get_endpoint_url_for_service('network', auth_ref['serviceCatalog'], url_type='internalURL') # TODO set this from args.version to future-proof // version = args.version version = 'v2.0' api_endpoint = '{endpoint}/{version}'.format(endpoint=endpoint,version=version) # Use get_neutron_client as it can check for stale token and get new one if necessary neutron = get_neutron_client(endpoint_url=endpoint) s = Session() s.headers.update( {'Content-type': 'application/json', 'X-Auth-Token': auth_token}) try: # Check for successful response from API endpoint r = s.get('%s/' % api_endpoint, verify=False, timeout=10) is_active = r.ok except (exc.ConnectionError, exc.HTTPError, exc.Timeout): is_active = False except Exception as e: status_err(str(e)) else: # Gather some metrics to report try: # **r = s.get('%s/routers/%s' % (api_endpoint, router), verify=False, #timeout=10) r = s.get('%s/routers' % (api_endpoint), verify=False, timeout=10) except Exception as e: status_err(str(e)) else: # **router_status = r.json()['router']['status'] routers = r.json()['routers'] status_ok() metric_bool('neutron_api_status', is_active) for router in routers: if(router['external_gateway_info'] == None): continue router_status = router['status'] router_name = (router['name']).replace(" ","").lower() if(router_status == 'ACTIVE'): metric_bool('neutron_router_' + router_name + '_status', 1) # If router_status is ACTIVE, perform ping check if(router_status == 'ACTIVE'): from subprocess import check_call # IP address is the WAN interface of the router ip_address = router['external_gateway_info']['external_fixed_ips'][0]['ip_address'] try: rc = check_call(['ping', '-c1', '-W3', ip_address]) except CalledProcessError: failed.append(router_name) if(rc == 0): metric('neutron_router_' + router_name + '_ping', 'string', 'SUCCESS') else: metric('neutron_router_' + router_name + '_ping', 'string', 'FAILURE')