Exemple #1
0
def check(args):
    try:
        start = datetime.datetime.now()
        # this is lame but the k8 python client did not work
        ret = subprocess.check_output([
            'kubectl',
            "--kubeconfig=%s" % args.kubeconfig,
            "--namespace=%s" % RACKSPACE_SYSTEM_NS, 'get', 'pods'
        ])
        end = datetime.datetime.now()
        api_is_up = (len(ret.split('\n')) > 1
                     )  # if rack system is empty something is terribly wrong
    except subprocess.CalledProcessError as e:
        api_is_up = False
        metric_bool('client_success', False, m_name='maas_managed_k8')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_managed_k8')
        status_err(str(e), m_name='maas_managed_k8')
    else:
        metric_bool('client_success', True, m_name='maas_managed_k8')
        dt = (end - start)
        milliseconds = (dt.microseconds + dt.seconds * 10**6) / 10**3

    status_ok(m_name='maas_managed_k8')
    metric_bool('managed_k8_api_local_status',
                api_is_up,
                m_name='maas_managed_k8')
    if api_is_up:
        # only want to send other metrics if api is up
        metric('managed_k8_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
Exemple #2
0
def main():
    metrics = {}
    session = requests.Session()  # Make a Session to store the auth creds
    session.auth = (options.username, options.password)

    protocol = 'https' if options.https else 'http'

    _get_connection_metrics(session, metrics, protocol,
                            options.host, options.port)
    _get_overview_metrics(session, metrics, protocol,
                          options.host, options.port)
    _get_node_metrics(session, metrics, protocol, options.host,
                      options.port, options.name)
    _get_queue_metrics(session, metrics, protocol, options.host,
                       options.port)
    _get_consumer_metrics(session, metrics, protocol, options.host,
                          options.port)

    status_ok(m_name='maas_rabbitmq')

    for k, v in metrics.items():
        if v['value'] is True or v['value'] is False:
            metric_bool('rabbitmq_%s_status' % k, not v['value'])
        else:
            metric('rabbitmq_%s' % k, 'int64', v['value'], v['unit'])
Exemple #3
0
def check(args):
    try:
        nova = get_openstack_client('compute')

    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        status_err(str(e), m_name='maas_nova')
    else:
        metric_bool('client_success', True, m_name='maas_nova')
        # get some cloud stats
        stats = [nova.get_hypervisor(i.id) for i in nova.hypervisors()]
        cloud_stats = collections.defaultdict(dict)
        count = 0
        for stat in stats:
            count += 1
            setattr(stat, 'count', count)
            for metric_name, vals in stats_mapping.iteritems():
                multiplier = 1
                if metric_name == 'total_vcpus':
                    multiplier = args.cpu_allocation_ratio
                elif metric_name == 'total_memory':
                    multiplier = args.mem_allocation_ratio
                cloud_stats[metric_name]['value'] = \
                    (getattr(stat, vals['stat_name']) * multiplier)
                cloud_stats[metric_name]['unit'] = \
                    vals['unit']
                cloud_stats[metric_name]['type'] = \
                    vals['type']

    status_ok(m_name='maas_nova')
    for metric_name in cloud_stats.iterkeys():
        metric('cloud_resource_%s' % metric_name,
               cloud_stats[metric_name]['type'],
               cloud_stats[metric_name]['value'],
               cloud_stats[metric_name]['unit'])
Exemple #4
0
def check(auth_ref, args):
    DESIGNATE_ENDPOINT = '{protocol}://{ip}:9001/'.format(
        protocol=args.protocol, ip=args.ip)

    try:
        if args.ip:
            endpoint = DESIGNATE_ENDPOINT
        else:
            endpoint = get_endpoint_url_for_service('dns', auth_ref,
                                                    'internal')
        # time something arbitrary
        start = datetime.datetime.now()
        r = requests.get(endpoint)
        end = datetime.datetime.now()
        api_is_up = (r.status_code == 200)
    except (requests.HTTPError, requests.Timeout, requests.ConnectionError):
        api_is_up = False
        metric_bool('client_success', False, m_name='maas_designate')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_designate')
        status_err(str(e), m_name='maas_designate')
    else:
        metric_bool('client_success', True, m_name='maas_designate')
        dt = (end - start)
        milliseconds = (dt.microseconds + dt.seconds * 10**6) / 10**3

    status_ok(m_name='maas_designate')
    metric_bool('designate_api_local_status',
                api_is_up,
                m_name='maas_designate')
    if api_is_up:
        # only want to send other metrics if api is up
        metric('designate_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
Exemple #5
0
def check(auth_ref, args):

    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    VOLUME_ENDPOINT = ('http://{ip}:8776/v1/{tenant}'.format(
        ip=args.ip, tenant=keystone.tenant_id))

    s = requests.Session()

    s.headers.update({
        'Content-type': 'application/json',
        'x-auth-token': auth_token
    })

    try:
        r = s.get('%s/volumes' % VOLUME_ENDPOINT, verify=False, timeout=10)
        is_up = r.ok
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        status_err(str(e))
    else:
        status_ok()
        metric_bool('cinder_api_local_status', is_up)
        # only want to send other metrics if api is up
        if is_up:
            milliseconds = r.elapsed.total_seconds() * 1000
            metric('cinder_api_local_response_time', 'uint32',
                   '%.3f' % milliseconds, 'ms')
Exemple #6
0
def main(args):

    bind_ip = str(args.ip)
    port = args.port
    is_up = True

    try:
        stats = item_stats(bind_ip, port)
        current_version = stats['version']
    except (TypeError, IndexError):
        is_up = False
        metric_bool('client_success', False, m_name='maas_memcached')
    else:
        is_up = True
        metric_bool('client_success', True, m_name='maas_memcached')
        if current_version not in VERSIONS:
            status_err('This plugin has only been tested with version %s '
                       'of memcached, and you are using version %s'
                       % (VERSIONS, current_version), m_name='maas_memcached')

    status_ok(m_name='maas_memcached')
    metric_bool('memcache_api_local_status', is_up, m_name='maas_memcached')
    if is_up:
        for m, u in MEMCACHE_METRICS.iteritems():
            metric('memcache_%s' % m, 'uint64', stats[m], u)
Exemple #7
0
def check(auth_ref, args):
    # We call get_keystone_client here as there is some logic within to get a
    # new token if previous one is bad.
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    api_endpoint = 'http://{ip}:9292/v2'.format(ip=args.ip)

    s = Session()

    s.headers.update(
        {'Content-type': 'application/json',
         'x-auth-token': auth_token})

    try:
        # Hit something that isn't querying the glance-registry, since we
        # query glance-registry in separate checks
        r = s.get('%s/schemas/image' % api_endpoint, verify=False,
                  timeout=10)
        is_up = r.ok
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    except Exception as e:
        status_err(str(e))

    status_ok()
    metric_bool('glance_api_local_status', is_up)

    # only want to send other metrics if api is up
    if is_up:
        milliseconds = r.elapsed.total_seconds() * 1000
        metric('glance_api_local_response_time', 
               'uint32', 
               '%.3f' % milliseconds, 
               'ms')
Exemple #8
0
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    tenant_id = keystone.tenant_id

    COMPUTE_ENDPOINT = ('http://{ip}:8774/v2/{tenant_id}'.format(
        ip=args.ip, tenant_id=tenant_id))

    try:
        if args.ip:
            nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)
        else:
            nova = get_nova_client()

    except Exception as e:
        status_err(str(e))
    else:
        # get some cloud stats
        stats = nova.hypervisor_stats.statistics()
        cloud_stats = collections.defaultdict(dict)
        for metric_name, vals in stats_mapping.iteritems():
            cloud_stats[metric_name]['value'] = \
                getattr(stats, vals['stat_name'])
            cloud_stats[metric_name]['unit'] = \
                vals['unit']
            cloud_stats[metric_name]['type'] = \
                vals['type']

    status_ok()
    for metric_name in cloud_stats.iterkeys():
        metric('cloud_resource_%s' % metric_name,
               cloud_stats[metric_name]['type'],
               cloud_stats[metric_name]['value'],
               cloud_stats[metric_name]['unit'])
def check(auth_ref, args):
    # We call get_keystone_client here as there is some logic within to get a
    # new token if previous one is bad.
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    registry_endpoint = 'http://{ip}:9191'.format(ip=args.ip)

    s = Session()

    s.headers.update(
        {'Content-type': 'application/json',
         'x-auth-token': auth_token})

    try:
        # /images returns a list of public, non-deleted images
        r = s.get('%s/images' % registry_endpoint, verify=False, timeout=10)
        is_up = r.ok
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    except Exception as e:
        status_err(str(e))

    status_ok()
    metric_bool('glance_registry_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        milliseconds = r.elapsed.total_seconds() * 1000
        metric('glance_registry_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
def check(args):
    metadata_endpoint = ('http://{ip}:8775'.format(ip=args.ip))
    is_up = True

    s = requests.Session()

    try:
        # looks like we can only get / (ec2 versions) without specifying
        # an instance ID and other headers
        versions = s.get('%s/' % metadata_endpoint,
                         verify=False,
                         timeout=10)
        milliseconds = versions.elapsed.total_seconds() * 1000
        if not versions.ok or '1.0' not in versions.content.splitlines():
            is_up = False
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        is_up = False
    except Exception as e:
        status_err(str(e))

    status_ok()
    metric_bool('nova_api_metadata_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_metadata_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
Exemple #11
0
def check_process_running(process_names, container_name=None):
    """Check to see if processes are running.

       Check if each of the processes in process_names are in a list
       of running processes in the specified container name, or on
       this host.
    """

    if not process_names:
        # The caller has not provided a value for process_names, which gives us
        # nothing to do. Return an error for the check.
        status_err('No process names provided')

    procs_path = '/sys/fs/cgroup/cpu/cgroup.procs'
    if container_name is not None:
        # Checking for processes in a container, not the parent host
        procs_path = os.path.join('/sys/fs/cgroup/cpu/lxc', container_name,
                                  'cgroup.procs')
    procs = get_processes(procs_path)

    if not procs:
        # Unable to get a list of process names for the container or host.
        status_err('Could not get a list of running processes')

    # Report the presence of each process from the command line in the
    # running process list for the host or specified container.
    for process_name in process_names:
        metric('process_check', '%s_process_status' % process_name,
               str(int(process_name in procs)))
Exemple #12
0
def check(args):
    ironic = get_openstack_client('baremetal')

    try:
        ironic_local_endpoint = generate_local_endpoint(
            str(ironic.get_endpoint()), args.ip, args.port, args.protocol,
            '/nodes')
        resp = ironic.session.get(ironic_local_endpoint)

    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_ironic')
        status_err(str(e), m_name='maas_ironic')
    else:
        is_up = resp.status_code == 200
        milliseconds = resp.elapsed.total_seconds() * 1000
        metric_bool('client_success', True, m_name='maas_ironic')

    status_ok(m_name='maas_ironic')
    metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic')
    if is_up:
        metric('ironic_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
def check(args):
    heat = get_openstack_client('orchestration')

    try:
        local_heat_endpoint = generate_local_endpoint(
            str(heat.get_endpoint()), args.ip, args.port,
            args.protocol, '/build_info'
        )
        resp = heat.session.get(local_heat_endpoint)

    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
        metric_bool('client_success', False, m_name='maas_heat')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_heat')
        status_err(str(e), m_name='maas_heat')
    else:
        is_up = True
        milliseconds = resp.elapsed.total_seconds() * 1000
        metric_bool('client_success', True, m_name='maas_heat')

    status_ok(m_name='maas_heat')
    metric_bool('heat_api_local_status', is_up, m_name='maas_heat')
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
def check(auth_ref, args):

    IRONIC_ENDPOINT = ('http://{ip}:6385/v1'.format(ip=args.ip))

    try:
        if args.ip:
            ironic = get_ironic_client(endpoint=IRONIC_ENDPOINT)
        else:
            ironic = get_ironic_client()

        is_up = True

    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_ironic')
        status_err(str(e), m_name='maas_ironic')
    else:
        metric_bool('client_success', True, m_name='maas_ironic')
        # time something arbitrary
        start = time.time()
        ironic.node.list()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok(m_name='maas_ironic')
    metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic')
    if is_up:
        # only want to send other metrics if api is up
        metric('ironic_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    tenant_id = keystone.tenant_id

    HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format
                     (ip=args.ip, tenant=tenant_id))

    try:
        if args.ip:
            heat = get_heat_client(endpoint=HEAT_ENDPOINT)
        else:
            heat = get_heat_client()

        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time.time()
        heat.build_info.build_info()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('heat_api_local_status', is_up)
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
def check(args):
    octavia = get_openstack_client('load_balancer')

    try:
        if args.ip:
            octavia_local_endpoint = generate_local_endpoint(
                str(octavia.get_endpoint()), args.ip, args.port,
                args.protocol,
                '/lbaas/loadbalancers'
            )
            resp = octavia.session.get(octavia_local_endpoint, timeout=180)

    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
        metric_bool('client_success', False, m_name='maas_octavia')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_octavia')
        status_err(str(e), m_name='maas_octavia')
    else:
        is_up = resp.ok
        metric_bool('client_success', True, m_name='maas_octavia')

    status_ok(m_name='maas_octavia')
    metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia')
    if is_up:
        loadbalancers = resp.json()['loadbalancers']
        num = len([lb for lb in loadbalancers
                   if lb['provisioning_status'] == 'ERROR'])
        # only want to send other metrics if api is up
        metric('octavia_num_lb_in_error_status',
               'uint32',
               num,
               'ms')
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    tenant_id = keystone.tenant_id

    heat_endpoint = ('{protocol}://{ip}:{port}/v1/{tenant}'.format(
        ip=args.ip, tenant=tenant_id, protocol=args.protocol, port=args.port))

    try:
        if args.ip:
            heat = get_heat_client(endpoint=heat_endpoint)
        else:
            heat = get_heat_client()

        is_up = True
    except exc.HTTPException as e:
        is_up = False
        metric_bool('client_success', False, m_name='maas_heat')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_heat')
        status_err(str(e), m_name='maas_heat')
    else:
        metric_bool('client_success', True, m_name='maas_heat')
        # time something arbitrary
        start = time.time()
        heat.build_info.build_info()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok(m_name='maas_heat')
    metric_bool('heat_api_local_status', is_up, m_name='maas_heat')
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api_local_response_time', 'double', '%.3f' % milliseconds,
               'ms')
def check(args):
    octavia = get_openstack_client('load_balancer')

    try:
        if args.ip:
            octavia_local_endpoint = generate_local_endpoint(
                str(octavia.get_endpoint()), args.ip, args.port, args.protocol,
                '/lbaas/loadbalancers?limit=1')
            resp = octavia.session.get(octavia_local_endpoint, timeout=180)

    except (exc.HTTPError, exc.Timeout, exc.ConnectionError):
        is_up = False
        metric_bool('client_success', False, m_name='maas_octavia')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_octavia')
        status_err(str(e), m_name='maas_octavia')
    else:
        is_up = resp.ok
        metric_bool('client_success', True, m_name='maas_octavia')
        milliseconds = resp.elapsed.total_seconds() * 1000

    status_ok(m_name='maas_octavia')
    metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia')
    if is_up:
        # only want to send other metrics if api is up
        metric('octavia_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
def check(args):
    glance = get_openstack_client('image')

    try:
        # Remove version from returned endpoint
        glance_endpoint = str(glance.get_endpoint().rsplit('/', 2)[0])
        local_registry_url = generate_local_endpoint(
            glance_endpoint, args.ip, args.port, args.protocol,
            '/images'
        )
        resp = glance.session.get(local_registry_url, timeout=180)
        milliseconds = resp.elapsed.total_seconds() * 1000

        is_up = resp.status_code == 200
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
        metric_bool('client_success', False, m_name='maas_glance')
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_glance')
        status_err(str(e), m_name='maas_glance')

    status_ok(m_name='maas_glance')
    metric_bool('client_success', True, m_name='maas_glance')
    metric_bool('glance_registry_local_status', is_up, m_name='maas_glance')
    # Only send remaining metrics if the API is up
    if is_up:
        metric('glance_registry_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
def check(args):

    NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip)

    try:
        neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)
        is_up = True
    # if we get a NeutronClientException don't bother sending any other metric
    # The API IS DOWN
    except exc.NeutronClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        neutron.list_agents()
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('neutron_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('neutron_api_local_response_time', 
               'uint32',
               '%.3f' % milliseconds,
               'ms')
Exemple #21
0
def check(args):

    IDENTITY_ENDPOINT = 'http://{ip}:35357/v2.0'.format(ip=args.ip)

    try:
        keystone = get_keystone_client(endpoint=IDENTITY_ENDPOINT)
        is_up = True
    except (exc.HttpServerError, exc.ClientException):
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        keystone.services.list()
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('keystone_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('keystone_api_local_response_time', 'uint32',
               '%.3f' % milliseconds, 'ms')
def check(auth_ref, args):
    # We call get_keystone_client here as there is some logic within to get a
    # new token if previous one is bad.
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    registry_endpoint = 'http://{ip}:9191'.format(ip=args.ip)

    s = requests.Session()

    s.headers.update({
        'Content-type': 'application/json',
        'x-auth-token': auth_token
    })

    try:
        # /images returns a list of public, non-deleted images
        r = s.get('%s/images' % registry_endpoint, verify=False, timeout=10)
        is_up = r.ok
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    except Exception as e:
        status_err(str(e))

    status_ok()
    metric_bool('glance_registry_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        milliseconds = r.elapsed.total_seconds() * 1000
        metric('glance_registry_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
Exemple #23
0
def check(args):
    octavia = get_openstack_client('load_balancer')

    try:
        if args.ip:
            octavia_local_endpoint = generate_local_endpoint(
                str(octavia.get_endpoint()), args.ip, args.port, args.protocol,
                '/lbaas/loadbalancers')
            resp = octavia.session.get(octavia_local_endpoint, timeout=180)

    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
        metric_bool('client_success', False, m_name='maas_octavia')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_octavia')
        status_err(str(e), m_name='maas_octavia')
    else:
        is_up = resp.ok
        metric_bool('client_success', True, m_name='maas_octavia')

    status_ok(m_name='maas_octavia')
    metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia')
    if is_up:
        loadbalancers = resp.json()['loadbalancers']
        num = len([
            lb for lb in loadbalancers if lb['provisioning_status'] == 'ERROR'
        ])
        # only want to send other metrics if api is up
        metric('octavia_num_lb_in_error_status', 'uint32', num, 'ms')
def check(args, tenant_id):

    HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format
                     (ip=args.ip, tenant=tenant_id))

    try:
        heat = get_heat_client(endpoint=HEAT_ENDPOINT)
        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        heat.build_info.build_info()
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('heat_api_local_status', is_up)
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api_local_response_time',
               'uint32', 
               '%.3f' % milliseconds, 
               'ms')
Exemple #25
0
def main(args):

    bind_ip = str(args.ip)
    port = args.port
    is_up = True

    try:
        stats = item_stats(bind_ip, port)
        current_version = stats['version']
    except (TypeError, IndexError):
        is_up = False
        metric_bool('client_success', False, m_name='maas_memcached')
    else:
        is_up = True
        metric_bool('client_success', True, m_name='maas_memcached')
        if current_version not in VERSIONS:
            status_err('This plugin has only been tested with version %s '
                       'of memcached, and you are using version %s' %
                       (VERSIONS, current_version),
                       m_name='maas_memcached')

    status_ok(m_name='maas_memcached')
    metric_bool('memcache_api_local_status', is_up, m_name='maas_memcached')
    if is_up:
        for m, u in MEMCACHE_METRICS.iteritems():
            metric('memcache_%s' % m, 'uint64', stats[m], u)
def check(auth_ref, args):

    ironic_endpoint = ('{protocol}://{ip}:{port}/v1'.format(
        ip=args.ip, protocol=args.protocol, port=args.port))

    try:
        if args.ip:
            ironic = get_ironic_client(endpoint=ironic_endpoint)
        else:
            ironic = get_ironic_client()

        is_up = True

    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_ironic')
        status_err(str(e), m_name='maas_ironic')
    else:
        metric_bool('client_success', True, m_name='maas_ironic')
        # time something arbitrary
        start = time.time()
        ironic.node.list()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok(m_name='maas_ironic')
    metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic')
    if is_up:
        # only want to send other metrics if api is up
        metric('ironic_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
Exemple #27
0
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token

    VOLUME_ENDPOINT = ('http://{hostname}:8776/v1/{tenant}'.format(
        hostname=args.hostname, tenant=keystone.tenant_id))

    s = requests.Session()

    s.headers.update({
        'Content-type': 'application/json',
        'x-auth-token': auth_token
    })

    try:
        # We cannot do /os-services?host=X as cinder returns a hostname of
        # X@lvm for cinder-volume binary
        r = s.get('%s/os-services' % VOLUME_ENDPOINT, verify=False, timeout=10)
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        status_err(str(e))

    if not r.ok:
        status_err('Could not get response from Cinder API')

    services = r.json()['services']

    # We need to match against a host of X and X@lvm (or whatever backend)
    if args.host:
        backend = ''.join((args.host, '@'))
        services = [
            service for service in services
            if (service['host'].startswith(backend)
                or service['host'] == args.host)
        ]

    if len(services) == 0:
        status_err('No host(s) found in the service list')

    if args.host:

        for service in services:
            service_is_up = True
            name = '%s_status' % service['binary']

            if service['status'] == 'enabled' and service['state'] != 'up':
                service_is_up = False

            if '@' in service['host']:
                [host, backend] = service['host'].split('@')
                name = '%s-%s_status' % (service['binary'], backend)

            metric('cinder_service', name, str(int(service_is_up)))
    else:
        for service in services:
            service_is_up = True
            if service['status'] == 'enabled' and service['state'] != 'up':
                service_is_up = False

            name = '%s_on_host_%s' % (service['binary'], service['host'])
            metric('cinder_service', name, str(int(service_is_up)))
def check(args):

    COMPUTE_ENDPOINT = 'http://{ip}:8774/v3'.format(ip=args.ip)

    try:
        nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)
        is_up = True
    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        nova.services.list()
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('nova_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_local_response_time', 'uint32', '%.3f' % milliseconds,
               'ms')
Exemple #29
0
def get_rgw_checkup(client,
                    keyring=None,
                    rgw_address=None,
                    container_name=None):
    rgw_status = get_ceph_rgw_hostcheck(rgw_address,
                                        container_name=container_name)
    maas_common.metric('rgw_up', 'uint32', rgw_status)
def check(auth_ref, args):
    MAGNUM_ENDPOINT = 'http://{ip}:9511/v1'.format(ip=args.ip,)

    try:
        if args.ip:
            magnum = get_magnum_client(endpoint=MAGNUM_ENDPOINT)
        else:
            magnum = get_magnum_client()

        api_is_up = True
    except exc.HttpError as e:
        api_is_up = False
        metric_bool('client_success', False, m_name='maas_magnum')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_magnum')
        status_err(str(e), m_name='maas_magnum')
    else:
        metric_bool('client_success', True, m_name='maas_magnum')
        # time something arbitrary
        start = time.time()
        magnum.cluster_templates.list()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok(m_name='maas_magnum')
    metric_bool('magnum_api_local_status', api_is_up, m_name='maas_magnum')
    if api_is_up:
        # only want to send other metrics if api is up
        metric('magnum_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
def check(args):

    COMPUTE_ENDPOINT = 'http://{ip}:8774/v3'.format(ip=args.ip)

    try:
        nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)
        is_up = True
    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        nova.services.list()
        end = time()
        milliseconds = (end - start) * 1000

        # gather some metrics
        status_count = collections.Counter(
            [s.status for s in nova.servers.list()])

    status_ok()
    metric_bool('nova_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_local_response_time', 'double', '%.3f' % milliseconds,
               'ms')
        for status in SERVER_STATUSES:
            metric('nova_servers_in_state_%s' % status, 'uint32',
                   status_count[status])
def check(args):
    ironic = get_openstack_client('baremetal')

    try:
        ironic_local_endpoint = generate_local_endpoint(
            str(ironic.get_endpoint()), args.ip, args.port,
            args.protocol, '/nodes'
        )
        resp = ironic.session.get(ironic_local_endpoint)

    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_ironic')
        status_err(str(e), m_name='maas_ironic')
    else:
        is_up = resp.status_code == 200
        milliseconds = resp.elapsed.total_seconds() * 1000
        metric_bool('client_success', True, m_name='maas_ironic')

    status_ok(m_name='maas_ironic')
    metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic')
    if is_up:
        metric('ironic_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
Exemple #33
0
def check(args):
    try:
        nova = get_openstack_client('compute')

    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        status_err(str(e), m_name='maas_nova')
    else:
        metric_bool('client_success', True, m_name='maas_nova')
        # get some cloud stats
        stats = [nova.get_hypervisor(i.id) for i in nova.hypervisors()]
        cloud_stats = collections.defaultdict(dict)
        count = 0
        for stat in stats:
            count += 1
            setattr(stat, 'count', count)
            for metric_name, vals in iter(stats_mapping.items()):
                multiplier = 1
                if metric_name == 'total_vcpus':
                    multiplier = args.cpu_allocation_ratio
                elif metric_name == 'total_memory':
                    multiplier = args.mem_allocation_ratio
                cloud_stats[metric_name]['value'] = \
                    (getattr(stat, vals['stat_name']) * multiplier)
                cloud_stats[metric_name]['unit'] = \
                    vals['unit']
                cloud_stats[metric_name]['type'] = \
                    vals['type']

    status_ok(m_name='maas_nova')
    for metric_name in iter(cloud_stats):
        metric('cloud_resource_%s' % metric_name,
               cloud_stats[metric_name]['type'],
               cloud_stats[metric_name]['value'],
               cloud_stats[metric_name]['unit'])
def check(args):
    metadata_endpoint = ('{protocol}://{ip}:{port}'.format(
        ip=args.ip, protocol=args.protocol, port=args.port))
    is_up = True

    s = requests.Session()

    try:
        # looks like we can only get / (ec2 versions) without specifying
        # an instance ID and other headers
        versions = s.get('%s/' % metadata_endpoint, verify=False, timeout=180)
        milliseconds = versions.elapsed.total_seconds() * 1000
        if not versions.ok or '1.0' not in versions.content.decode(
        ).splitlines():
            is_up = False
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        is_up = False
        metric_bool('client_success', False, m_name='maas_nova')
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        status_err(str(e), m_name='maas_nova')
    else:
        metric_bool('client_success', True, m_name='maas_nova')

    status_ok(m_name='maas_nova')
    metric_bool('nova_api_metadata_local_status', is_up, m_name='maas_nova')
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_metadata_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
def check(auth_ref, args):
    MAGNUM_ENDPOINT = 'http://{ip}:9511/v1'.format(ip=args.ip, )

    try:
        if args.ip:
            magnum = get_magnum_client(endpoint=MAGNUM_ENDPOINT)
        else:
            magnum = get_magnum_client()

        api_is_up = True
    except exc.HttpError as e:
        api_is_up = False
        metric_bool('client_success', False, m_name='maas_magnum')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_magnum')
        status_err(str(e), m_name='maas_magnum')
    else:
        metric_bool('client_success', True, m_name='maas_magnum')
        # time something arbitrary
        start = time.time()
        magnum.cluster_templates.list()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok(m_name='maas_magnum')
    metric_bool('magnum_api_local_status', api_is_up, m_name='maas_magnum')
    if api_is_up:
        # only want to send other metrics if api is up
        metric('magnum_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
def check(auth_ref, args):

    ironic_endpoint = ('{protocol}://{ip}:{port}/v1'.format(
        ip=args.ip, protocol=args.protocol, port=args.port))

    try:
        if args.ip:
            ironic = get_ironic_client(endpoint=ironic_endpoint)
        else:
            ironic = get_ironic_client()

        is_up = True

    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_ironic')
        status_err(str(e), m_name='maas_ironic')
        return
    else:
        metric_bool('client_success', True, m_name='maas_ironic')
        # pass limit=0 to list all nodes list without pagination
        all_nodes = ironic.node.list(limit=0)
        status_ok(m_name='maas_ironic')

    if is_up:
        maint_nodes = [node for node in all_nodes if node.maintenance]
        maint_nodes_count = len(maint_nodes)
        total_nodes = len(all_nodes)
        up_nodes = total_nodes - maint_nodes_count
        metric('ironic_up_nodes_count', 'uint32', up_nodes)
        metric('ironic_total_nodes_count', 'uint32', total_nodes)
def check(auth_ref, args):
    OCTAVIA_ENDPOINT = 'http://{ip}:9876/v1'.format(ip=args.ip,)

    try:
        if args.ip:
            endpoint = OCTAVIA_ENDPOINT
        else:
            endpoint = get_endpoint_url_for_service(
                'load-balancer', auth_ref, 'internal')
        # time something arbitrary
        start = datetime.datetime.now()
        r = requests.get(endpoint + "/v1/loadbalancers?limit=1")
        end = datetime.datetime.now()
        api_is_up = (r.status_code == 200)
    except (requests.HTTPError, requests.Timeout, requests.ConnectionError):
        api_is_up = False
        metric_bool('client_success', False, m_name='maas_octavia')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_octavia')
        status_err(str(e), m_name='maas_octavia')
    else:
        metric_bool('client_success', True, m_name='maas_octavia')
        dt = (end - start)
        milliseconds = (dt.microseconds + dt.seconds * 10 ** 6) / 10 ** 3

    status_ok(m_name='maas_octavia')
    metric_bool('octavia_api_local_status', api_is_up, m_name='maas_octavia')
    if api_is_up:
        # only want to send other metrics if api is up
        metric('octavia_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
def check(args, tenant_id):

    CEILOMETER_ENDPOINT = 'http://{ip}:8777'.format(ip=args.ip)

    try:
        ceilometer = get_ceilometer_client(endpoint=CEILOMETER_ENDPOINT)
        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        meters = ceilometer.meters.list()
        # Exceptions are only thrown when we iterate over meter
        [i.meter_id for i in meters]
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('ceilometer_api_local_status', is_up)
    if is_up:
        # only want to send other metrics if api is up
        metric('ceilometer_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    tenant_id = keystone.tenant_id

    COMPUTE_ENDPOINT = (
        'http://{ip}:8774/v2/{tenant_id}'.format(ip=args.ip,
                                                 tenant_id=tenant_id)
    )

    try:
        if args.ip:
            nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)
        else:
            nova = get_nova_client()

    except Exception as e:
        status_err(str(e))
    else:
        # get some cloud stats
        stats = nova.hypervisor_stats.statistics()
        cloud_stats = collections.defaultdict(dict)
        for metric_name, vals in stats_mapping.iteritems():
            cloud_stats[metric_name]['value'] = \
                getattr(stats, vals['stat_name'])
            cloud_stats[metric_name]['unit'] = \
                vals['unit']
            cloud_stats[metric_name]['type'] = \
                vals['type']

    status_ok()
    for metric_name in cloud_stats.iterkeys():
        metric('cloud_resource_%s' % metric_name,
               cloud_stats[metric_name]['type'],
               cloud_stats[metric_name]['value'],
               cloud_stats[metric_name]['unit'])
Exemple #40
0
def get_health_checks(client=None,
                      keyring=None,
                      section=None,
                      container_name=None,
                      deploy_osp=False):
    metrics = []

    ceph_status = get_ceph_status(client=client,
                                  keyring=keyring,
                                  container_name=container_name,
                                  deploy_osp=deploy_osp)

    # Go through the detailed health checks and generate metrics
    # for each based on the given section
    for curcheck in DETAILED_CHECKS[section]:
        if curcheck in ceph_status['health']['checks']:
            severity = ceph_status['health']['checks'][curcheck]['severity']
            metrics.append({
                'name': curcheck,
                'type': 'uint32',
                'value': STATUSES[severity]
            })
        else:
            metrics.append({
                'name': curcheck,
                'type': 'uint32',
                'value': STATUSES['HEALTH_OK']
            })

    # Submit gathered metrics
    for m in metrics:
        metric(m['name'], m['type'], m['value'])
Exemple #41
0
def main():
    metrics = {}
    session = requests.Session()  # Make a Session to store the auth creds
    session.auth = (options.username, options.password)

    protocol = 'https' if options.https else 'http'

    _get_connection_metrics(session, metrics, protocol,
                            options.host, options.port)
    _get_overview_metrics(session, metrics, protocol,
                          options.host, options.port)
    _get_node_metrics(session, metrics, protocol, options.host,
                      options.port, options.name)
    _get_queue_metrics(session, metrics, protocol, options.host,
                       options.port)
    _get_consumer_metrics(session, metrics, protocol, options.host,
                          options.port)

    status_ok(m_name='maas_rabbitmq')

    for k, v in metrics.items():
        if v['value'] is True or v['value'] is False:
            metric_bool('rabbitmq_%s_status' % k, not v['value'])
        else:
            metric('rabbitmq_%s' % k, 'int64', v['value'], v['unit'])
Exemple #42
0
def check(args):
    try:
        # attempt a query to example.com
        # return good check on any valid response
        start = datetime.datetime.now()
        message = dns.message.make_query("example.org", "A")
        answer = dns.query.udp(message, timeout=5, where=args.ip, port=5354)
        end = datetime.datetime.now()
        # int of return code
        mdns_is_up = (answer.rcode() <= 16)
    except (dns.exception.Timeout):
        mdns_is_up = False
        metric_bool('client_success', False, m_name='maas_designate')
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_designate')
        status_err(str(e), m_name='maas_designate')
    else:
        metric_bool('client_success', True, m_name='maas_designate')
        dt = (end - start)
        milliseconds = (dt.microseconds + dt.seconds * 10 ** 6) / 10 ** 3

    status_ok(m_name='maas_designate')
    metric_bool('designate_mdns_local_status',
                mdns_is_up, m_name='maas_designate')
    if mdns_is_up:
        # only want to send other metrics if api is up
        metric('designate_mdns_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
Exemple #43
0
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    tenant_id = keystone.tenant_id

    HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format(ip=args.ip,
                                                           tenant=tenant_id))

    try:
        if args.ip:
            heat = get_heat_client(endpoint=HEAT_ENDPOINT)
        else:
            heat = get_heat_client()

        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time.time()
        heat.build_info.build_info()
        end = time.time()
        milliseconds = (end - start) * 1000

    metric('heat_api', 'heat_api_local_status', str(int(is_up)))
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api', 'heat_api_local_response_time',
               '%.3f' % milliseconds)
def check(auth_ref, args):
    GLANCE_ENDPOINT = ('http://{ip}:9292/v1'.format(ip=args.ip))

    try:
        if args.ip:
            glance = get_glance_client(endpoint=GLANCE_ENDPOINT)
        else:
            glance = get_glance_client()

        is_up = True
    except exc.HTTPException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time.time()
        glance.images.list(search_opts={'all_tenants': 1})
        end = time.time()
        milliseconds = (end - start) * 1000
        # gather some metrics
        images = glance.images.list(search_opts={'all_tenants': 1})
        status_count = collections.Counter([s.status for s in images])

    status_ok()
    metric_bool('glance_api_local_status', is_up)

    # only want to send other metrics if api is up
    if is_up:
        metric('glance_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
        for status in IMAGE_STATUSES:
            metric('glance_%s_images' % status, 'uint32', status_count[status],
                   'images')
def check(args):
    octavia = get_openstack_client('load_balancer')

    try:
        if args.ip:
            octavia_local_endpoint = generate_local_endpoint(
                str(octavia.get_endpoint()), args.ip, args.port,
                args.protocol, '/lbaas/loadbalancers?limit=1'
            )
            resp = octavia.session.get(octavia_local_endpoint, timeout=180)

    except (exc.HTTPError, exc.Timeout, exc.ConnectionError):
        is_up = False
        metric_bool('client_success', False, m_name='maas_octavia')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_octavia')
        status_err(str(e), m_name='maas_octavia')
    else:
        is_up = resp.ok
        metric_bool('client_success', True, m_name='maas_octavia')
        milliseconds = resp.elapsed.total_seconds() * 1000

    status_ok(m_name='maas_octavia')
    metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia')
    if is_up:
        # only want to send other metrics if api is up
        metric('octavia_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
def check(args, tenant_id):

    HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format
                     (ip=args.ip, tenant=tenant_id))

    try:
        heat = get_heat_client(endpoint=HEAT_ENDPOINT)
        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        heat.build_info.build_info()
        end = time()
        milliseconds = (end - start) * 1000

        # Add other metrics
        stack_count = len(list(heat.stacks.list()))

    status_ok()
    metric_bool('heat_api_local_status', is_up)
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
        metric('heat_stack_count', 'uint32', stack_count, 'stacks')
Exemple #47
0
def get_cluster_statistics(client=None, keyring=None, container_name=None):
    metrics = []

    ceph_status = get_ceph_status(client=client,
                                  keyring=keyring,
                                  container_name=container_name)
    # Get overall cluster health
    # For luminous+ this is the ceph_status.health.status
    # For < Luminous this is the ceph_status.health.overall_status
    ceph_health_status = ceph_status['health']['overall_status']
    if 'status' in ceph_status['health']:
        ceph_health_status = ceph_status['health']['status']
    metrics.append({
        'name': 'cluster_health',
        'type': 'uint32',
        'value': STATUSES[ceph_health_status]})

    # Collect epochs for the mon and osd maps
    metrics.append({'name': "monmap_epoch",
                    'type': 'uint32',
                    'value': ceph_status['monmap']['epoch']})
    metrics.append({'name': "osdmap_epoch",
                    'type': 'uint32',
                    'value': ceph_status['osdmap']['osdmap']['epoch']})

    # Collect OSDs per state
    osds = {'total': ceph_status['osdmap']['osdmap']['num_osds'],
            'up': ceph_status['osdmap']['osdmap']['num_up_osds'],
            'in': ceph_status['osdmap']['osdmap']['num_in_osds']}
    for k in osds:
        metrics.append({'name': 'osds_%s' % k,
                        'type': 'uint32',
                        'value': osds[k]})

    # Collect cluster size & utilisation
    metrics.append({'name': 'osds_kb_used',
                    'type': 'uint64',
                    'value': ceph_status['pgmap']['bytes_used'] / 1024})
    metrics.append({'name': 'osds_kb_avail',
                    'type': 'uint64',
                    'value': ceph_status['pgmap']['bytes_avail'] / 1024})
    metrics.append({'name': 'osds_kb',
                    'type': 'uint64',
                    'value': ceph_status['pgmap']['bytes_total'] / 1024})

    # Collect num PGs and num healthy PGs
    pgs = {'total': ceph_status['pgmap']['num_pgs'], 'active_clean': 0}
    for state in ceph_status['pgmap']['pgs_by_state']:
        if state['state_name'] == 'active+clean':
            pgs['active_clean'] = state['count']
            break
    for k in pgs:
        metrics.append({'name': 'pgs_%s' % k,
                        'type': 'uint32',
                        'value': pgs[k]})

    # Submit gathered metrics
    for m in metrics:
        maas_common.metric(m['name'], m['type'], m['value'])
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token

    VOLUME_ENDPOINT = ('http://{ip}:8776/v1/{tenant}'.format
                       (ip=args.ip, tenant=keystone.tenant_id))

    s = requests.Session()

    s.headers.update(
        {'Content-type': 'application/json',
         'x-auth-token': auth_token})

    try:
        vol = s.get('%s/volumes/detail' % VOLUME_ENDPOINT,
                    verify=False,
                    timeout=5)
        milliseconds = vol.elapsed.total_seconds() * 1000
        snap = s.get('%s/snapshots/detail' % VOLUME_ENDPOINT,
                     verify=False,
                     timeout=5)
        is_up = vol.ok and snap.ok
    except (exc.ConnectionError,
            exc.HTTPError,
            exc.Timeout) as e:
        is_up = False
        metric_bool('client_success', False, m_name='maas_cinder')
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_cinder')
        status_err(str(e), m_name='maas_cinder')
    else:
        metric_bool('client_success', True, m_name='maas_cinder')
        # gather some metrics
        vol_statuses = [v['status'] for v in vol.json()['volumes']]
        vol_status_count = collections.Counter(vol_statuses)
        total_vols = len(vol.json()['volumes'])

        snap_statuses = [v['status'] for v in snap.json()['snapshots']]
        snap_status_count = collections.Counter(snap_statuses)
        total_snaps = len(snap.json()['snapshots'])

    status_ok(m_name='maas_cinder')
    metric_bool('cinder_api_local_status', is_up, m_name='maas_cinder')
    # only want to send other metrics if api is up
    if is_up:
        metric('cinder_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
        metric('total_cinder_volumes', 'uint32', total_vols, 'volumes')
        for status in VOLUME_STATUSES:
            metric('cinder_%s_volumes' % status,
                   'uint32',
                   vol_status_count[status], 'volumes')
        metric('total_cinder_snapshots', 'uint32', total_snaps, 'snapshots')
        for status in VOLUME_STATUSES:
            metric('cinder_%s_snaps' % status,
                   'uint32',
                   snap_status_count[status], 'snapshots')
def main():
    try:
        metrics = get_metrics()
    except maas_common.MaaSException as e:
        maas_common.status_err(str(e))
    else:
        maas_common.status_ok()
        for name, data in metrics.viewitems():
            maas_common.metric(name, 'uint32', data['value'])
Exemple #50
0
def main():
    options = parse_args()
    configure(options)
    query = build_query(options)

    num_hits = get_count_for_querystring(query)

    status_ok()
    metric('HITS', 'uint32', num_hits)
Exemple #51
0
def main():
    configure(options)

    latest = most_recent_index()
    num_errors = get_number_of('ERROR', latest)
    num_warnings = get_number_of('WARN*', latest)

    status_ok(m_name='maas_galera')
    metric('NUMBER_OF_LOG_ERRORS', 'uint32', num_errors)
    metric('NUMBER_OF_LOG_WARNINGS', 'uint32', num_warnings)
def get_poller_fd_details():
    """Generate metrics for the poller's file descriptor usage"""
    proc = _get_poller_proc()
    if proc is None:
        return

    metric("maas_poller_fd_count", "uint32", proc.num_fds())

    # rlimit returns soft and hard limits, but only use hard
    _, hard_limit = proc.rlimit(psutil.RLIMIT_NOFILE)
    metric("maas_poller_fd_max", "uint32", hard_limit)
def get_mon_statistics(report=None, host=None):
    mon = [m for m in report['monmap']['mons']
           if m['name'] == host]
    mon_in = mon[0]['rank'] in report['quorum']
    maas_common.metric_bool('mon_in_quorum', mon_in)
    health_status = 0
    for each in report['health']['health']['health_services'][0]['mons']:
        if each['name'] == host:
            health_status = STATUSES[each['health']]
            break
    maas_common.metric('mon_health', 'uint32', health_status)
def main():
    options, _ = parse_args()
    configure(options)

    latest = most_recent_index()
    num_errors = get_number_of('ERROR', latest)
    num_warnings = get_number_of('WARN*', latest)

    status_ok()
    metric('NUMBER_OF_LOG_ERRORS', 'uint32', num_errors)
    metric('NUMBER_OF_LOG_WARNINGS', 'uint32', num_warnings)
def get_mon_statistics(client=None, keyring=None, host=None):
    ceph_status = get_ceph_status(client=client, keyring=keyring)
    mon = [m for m in ceph_status['monmap']['mons']
           if m['name'] == host]
    mon_in = mon[0]['rank'] in ceph_status['quorum']
    maas_common.metric_bool('mon_in_quorum', mon_in)
    health_status = 0
    for each in ceph_status['health']['health']['health_services'][0]['mons']:
        if each['name'] == host:
            health_status = STATUSES[each['health']]
            break
    maas_common.metric('mon_health', 'uint32', health_status)
Exemple #56
0
def check_for_failed_actions():

    output = check_command('pcs', 'status')
    pattern = re.compile(
        "Failed|Stopped|Notice|Fail|Error|Warning|Faulty", flags=re.IGNORECASE)
    bad_things_happened = re.search(pattern, output)

    if bad_things_happened:
        metric('pacemaker_failed_actions', 'string',
               'Errors in pacemaker cluster')
    else:
        metric('pacemaker_failed_actions', 'string', 'Pacemaker cluster is OK')