Ejemplo n.º 1
0
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    tenant_id = keystone.tenant_id

    COMPUTE_ENDPOINT = (
        'http://{ip}:8774/v2/{tenant_id}'.format(ip=args.ip,
                                                 tenant_id=tenant_id)
    )

    try:
        if args.ip:
            nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)
        else:
            nova = get_nova_client()

    except Exception as e:
        status_err(str(e))
    else:
        # get some cloud stats
        stats = nova.hypervisor_stats.statistics()
        cloud_stats = collections.defaultdict(dict)
        for metric_name, vals in stats_mapping.iteritems():
            cloud_stats[metric_name]['value'] = \
                getattr(stats, vals['stat_name'])
            cloud_stats[metric_name]['unit'] = \
                vals['unit']
            cloud_stats[metric_name]['type'] = \
                vals['type']

    status_ok()
    for metric_name in cloud_stats.iterkeys():
        metric('cloud_resource_%s' % metric_name,
               cloud_stats[metric_name]['type'],
               cloud_stats[metric_name]['value'],
               cloud_stats[metric_name]['unit'])
Ejemplo n.º 2
0
def check(args, tenant_id):

    HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format
                     (ip=args.ip, tenant=tenant_id))

    try:
        heat = get_heat_client(endpoint=HEAT_ENDPOINT)
        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        heat.build_info.build_info()
        end = time()
        milliseconds = (end - start) * 1000

        # Add other metrics
        stack_count = len(list(heat.stacks.list()))

    status_ok()
    metric_bool('heat_api_local_status', is_up)
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
        metric('heat_stack_count', 'uint32', stack_count, 'stacks')
Ejemplo n.º 3
0
def check_process_running(process_names, container_name=None):
    """Check to see if processes are running.

       Check if each of the processes in process_names are in a list
       of running processes in the specified container name, or on
       this host.
    """

    if not process_names:
        # The caller has not provided a value for process_names, which gives us
        # nothing to do. Return an error for the check.
        status_err('No process names provided')

    procs_path = '/sys/fs/cgroup/cpu/cgroup.procs'
    if container_name is not None:
        # Checking for processes in a container, not the parent host
        procs_path = os.path.join('/sys/fs/cgroup/cpu/lxc', container_name,
                                  'cgroup.procs')
    procs = get_processes(procs_path)

    if not procs:
        # Unable to get a list of process names for the container or host.
        status_err('Could not get a list of running processes')

    # Since we've fetched a process list, report status_ok.
    status_ok()

    # Report the presence of each process from the command line in the
    # running process list for the host or specified container.
    for process_name in process_names:
        metric_bool('%s_process_status' % process_name,
                    process_name in procs)
Ejemplo n.º 4
0
def check(args):

    NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip)
    try:
        neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        status_err(str(e))

    # gather nova service states
    if args.host:
        agents = neutron.list_agents(host=args.host)['agents']
    else:
        agents = neutron.list_agents()['agents']

    if len(agents) == 0:
        status_err("No host(s) found in the agents list")

    # return all the things
    status_ok()
    for agent in agents:
        agent_is_up = True
        if agent['admin_state_up'] and not agent['alive']:
            agent_is_up = False

        if args.host:
            name = '%s_status' % agent['binary']
        else:
            name = '%s_%s_on_host_%s' % (agent['binary'],
                                         agent['id'],
                                         agent['host'])

        metric_bool(name, agent_is_up)
Ejemplo n.º 5
0
def main(args):
    if len(args.omc) != 2:
        args = ' '.join(args.omc)
        status_err(
            'Requires 2 arguments, arguments provided: "%s"' % args,
            m_name='maas_hwvendor'
        )

    report_type = args.omc[0].lower()
    report_request = args.omc[1].lower()

    # If we're not using the correct version of OpenManage, error out
    check_openmanage_version()

    try:
        report = hardware_report(report_type, report_request)
    except (OSError, subprocess.CalledProcessError) as e:
        metric_bool('hardware_%s_status' % report_request, False)
        status_err(str(e), m_name='maas_hwvendor')

    status_ok(m_name='maas_hwvendor')
    if report_request == 'pwrsupplies':
        metric_bool('hardware_%s_status' % report_request,
                    all_okay(report, regex[report_request]))
    else:
        metric_bool('hardware_%s_status' % report_request,
                    all_okay(report, regex[report_type]))
def check(args):

    NETWORK_ENDPOINT = 'http://{ip}:9696'.format(ip=args.ip)

    try:
        neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)
        is_up = True
    # if we get a NeutronClientException don't bother sending any other metric
    # The API IS DOWN
    except exc.NeutronClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        neutron.list_agents()
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('neutron_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('neutron_api_local_response_time', 
               'uint32',
               '%.3f' % milliseconds,
               'ms')
Ejemplo n.º 7
0
def check(args):
    designate = get_openstack_client('dns')

    try:
        if args.ip:
            # Arbitrary call to /zones to ensure the local API is up
            designate_local_endpoint = generate_local_endpoint(
                str(designate.get_endpoint()), args.ip, args.port,
                args.protocol, '/zones')
            resp = designate.session.get(designate_local_endpoint, timeout=180)
            milliseconds = resp.elapsed.total_seconds() * 1000
        # NOTE(npawelek): At the time of converting to OpenStack SDK,
        # DNS is not yet fully integrated. Excluding integration with
        # the client directly until a later time.

        api_is_up = resp.ok
    except (exc.HTTPError, exc.Timeout, exc.ConnectionError):
        api_is_up = False
        metric_bool('client_success', False, m_name='maas_designate')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_designate')
        status_err(str(e), m_name='maas_designate')
    else:
        metric_bool('client_success', True, m_name='maas_designate')

    status_ok(m_name='maas_designate')
    metric_bool('designate_api_local_status',
                api_is_up,
                m_name='maas_designate')
    if api_is_up:
        # only want to send other metrics if api is up
        metric('designate_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
def check(args):
    octavia = get_openstack_client('load_balancer')

    try:
        if args.ip:
            octavia_local_endpoint = generate_local_endpoint(
                str(octavia.get_endpoint()), args.ip, args.port, args.protocol,
                '/lbaas/loadbalancers?limit=1')
            resp = octavia.session.get(octavia_local_endpoint, timeout=180)

    except (exc.HTTPError, exc.Timeout, exc.ConnectionError):
        is_up = False
        metric_bool('client_success', False, m_name='maas_octavia')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_octavia')
        status_err(str(e), m_name='maas_octavia')
    else:
        is_up = resp.ok
        metric_bool('client_success', True, m_name='maas_octavia')
        milliseconds = resp.elapsed.total_seconds() * 1000

    status_ok(m_name='maas_octavia')
    metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia')
    if is_up:
        # only want to send other metrics if api is up
        metric('octavia_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
Ejemplo n.º 9
0
def main():
    args = parse_args()
    galera_container = args.galera_container_name
    holland_bin = args.holland_binary
    holland_bs = args.holland_backupset

    today = datetime.date.today().strftime('%Y%m%d')
    yesterday = (datetime.date.today() -
                 datetime.timedelta(days=1)).strftime('%Y%m%d')

    # Get completed Holland backup set
    backupsets = \
        container_holland_lb_check(galera_container, holland_bin, holland_bs)

    if len(
        [backup
         for backup in backupsets if yesterday or today in backup[0]]) > 0:
        status_ok()
        metric_bool('holland_backup_status', True)
    else:
        status_err('Could not find Holland backup from %s or %s' %
                   (yesterday, today))
        metric_bool('holland_backup_status', False)

    # Print metric about last backup
    print_metrics('holland_backup_size', float(backupsets[-1][1]) / 1024)
Ejemplo n.º 10
0
def check(auth_ref, args):

    IRONIC_ENDPOINT = ('http://{ip}:6385/v1'.format(ip=args.ip))

    try:
        if args.ip:
            ironic = get_ironic_client(endpoint=IRONIC_ENDPOINT)
        else:
            ironic = get_ironic_client()

        is_up = True

    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_ironic')
        status_err(str(e), m_name='maas_ironic')
    else:
        metric_bool('client_success', True, m_name='maas_ironic')
        # time something arbitrary
        start = time.time()
        ironic.node.list()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok(m_name='maas_ironic')
    metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic')
    if is_up:
        # only want to send other metrics if api is up
        metric('ironic_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
Ejemplo n.º 11
0
def check(args):
    ironic = get_openstack_client('baremetal')

    try:
        ironic_local_endpoint = generate_local_endpoint(
            str(ironic.get_endpoint()), args.ip, args.port, args.protocol,
            '/nodes')
        resp = ironic.session.get(ironic_local_endpoint)

    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_ironic')
        status_err(str(e), m_name='maas_ironic')
    else:
        is_up = resp.status_code == 200
        milliseconds = resp.elapsed.total_seconds() * 1000
        metric_bool('client_success', True, m_name='maas_ironic')

    status_ok(m_name='maas_ironic')
    metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic')
    if is_up:
        metric('ironic_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
Ejemplo n.º 12
0
def check(args):
    heat = get_openstack_client('orchestration')

    try:
        local_heat_endpoint = generate_local_endpoint(
            str(heat.get_endpoint()), args.ip, args.port,
            args.protocol, '/build_info'
        )
        resp = heat.session.get(local_heat_endpoint)

    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
        metric_bool('client_success', False, m_name='maas_heat')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_heat')
        status_err(str(e), m_name='maas_heat')
    else:
        is_up = True
        milliseconds = resp.elapsed.total_seconds() * 1000
        metric_bool('client_success', True, m_name='maas_heat')

    status_ok(m_name='maas_heat')
    metric_bool('heat_api_local_status', is_up, m_name='maas_heat')
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
def check(auth_ref, args):
    keystone = get_keystone_client(auth_ref)
    tenant_id = keystone.tenant_id

    HEAT_ENDPOINT = ('http://{ip}:8004/v1/{tenant}'.format
                     (ip=args.ip, tenant=tenant_id))

    try:
        if args.ip:
            heat = get_heat_client(endpoint=HEAT_ENDPOINT)
        else:
            heat = get_heat_client()

        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time.time()
        heat.build_info.build_info()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('heat_api_local_status', is_up)
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
Ejemplo n.º 14
0
def check(args):
    octavia = get_openstack_client('load_balancer')

    try:
        if args.ip:
            octavia_local_endpoint = generate_local_endpoint(
                str(octavia.get_endpoint()), args.ip, args.port,
                args.protocol,
                '/lbaas/loadbalancers'
            )
            resp = octavia.session.get(octavia_local_endpoint, timeout=180)

    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
        metric_bool('client_success', False, m_name='maas_octavia')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_octavia')
        status_err(str(e), m_name='maas_octavia')
    else:
        is_up = resp.ok
        metric_bool('client_success', True, m_name='maas_octavia')

    status_ok(m_name='maas_octavia')
    metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia')
    if is_up:
        loadbalancers = resp.json()['loadbalancers']
        num = len([lb for lb in loadbalancers
                   if lb['provisioning_status'] == 'ERROR'])
        # only want to send other metrics if api is up
        metric('octavia_num_lb_in_error_status',
               'uint32',
               num,
               'ms')
Ejemplo n.º 15
0
def main():
    usage = "Usage: %prog [-h] [-H] username password"

    parser = optparse.OptionParser(usage=usage)
    parser.add_option(
        '-H', '--host',
        action='store',
        dest='host',
        default=None,
        help="Allow user to connect to something other than localhost"
    )
    (options, args) = parser.parse_args()

    # We will need the username and password to connect to the database
    if len(args) != 2:
        parser.print_help()
        raise SystemExit(True)

    # According to
    # http://www.percona.com/doc/percona-toolkit/2.2/pt-table-checksum.html
    # If the exit status is 0, everything is okay, otherwise the exit status
    # will be non-zero. We don't need stdout at the moment so we can discard
    # it. Stderr should contain any problems we run across.
    (status, _, err) = table_checksum(args[0], args[1], options.host)
    if status != 0:
        status_err(err.strip())
        raise SystemExit(True)

    status_ok()
Ejemplo n.º 16
0
def check(auth_ref, args):
    MAGNUM_ENDPOINT = 'http://{ip}:9511/v1'.format(ip=args.ip,)

    try:
        if args.ip:
            magnum = get_magnum_client(endpoint=MAGNUM_ENDPOINT)
        else:
            magnum = get_magnum_client()

        api_is_up = True
    except exc.HttpError as e:
        api_is_up = False
        metric_bool('client_success', False, m_name='maas_magnum')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_magnum')
        status_err(str(e), m_name='maas_magnum')
    else:
        metric_bool('client_success', True, m_name='maas_magnum')
        services = magnum.mservices.list()

    status_ok(m_name='maas_magnum')
    metric_bool('magnum_api_local_status', api_is_up, m_name='maas_magnum')
    if api_is_up:
        for service in services:
            metric_bool('_'.join([service.binary, 'status']),
                        True if service.state == 'up' else False)
Ejemplo n.º 17
0
def check(auth_ref, args):

    ironic_endpoint = ('{protocol}://{ip}:{port}/v1'.format(
        ip=args.ip, protocol=args.protocol, port=args.port))

    try:
        if args.ip:
            ironic = get_ironic_client(endpoint=ironic_endpoint)
        else:
            ironic = get_ironic_client()

        is_up = True

    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_ironic')
        status_err(str(e), m_name='maas_ironic')
        return
    else:
        metric_bool('client_success', True, m_name='maas_ironic')
        # pass limit=0 to list all nodes list without pagination
        all_nodes = ironic.node.list(limit=0)
        status_ok(m_name='maas_ironic')

    if is_up:
        maint_nodes = [node for node in all_nodes if node.maintenance]
        maint_nodes_count = len(maint_nodes)
        total_nodes = len(all_nodes)
        up_nodes = total_nodes - maint_nodes_count
        metric('ironic_up_nodes_count', 'uint32', up_nodes)
        metric('ironic_total_nodes_count', 'uint32', total_nodes)
def check(args):
    metadata_endpoint = ('{protocol}://{ip}:{port}'.format(
        ip=args.ip, protocol=args.protocol, port=args.port))
    is_up = True

    s = requests.Session()

    try:
        # looks like we can only get / (ec2 versions) without specifying
        # an instance ID and other headers
        versions = s.get('%s/' % metadata_endpoint, verify=False, timeout=180)
        milliseconds = versions.elapsed.total_seconds() * 1000
        if not versions.ok or '1.0' not in versions.content.decode(
        ).splitlines():
            is_up = False
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        is_up = False
        metric_bool('client_success', False, m_name='maas_nova')
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        status_err(str(e), m_name='maas_nova')
    else:
        metric_bool('client_success', True, m_name='maas_nova')

    status_ok(m_name='maas_nova')
    metric_bool('nova_api_metadata_local_status', is_up, m_name='maas_nova')
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_metadata_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
def check(args):

    IDENTITY_ENDPOINT = 'http://{ip}:35357/v3'.format(ip=args.ip)

    try:
        keystone = get_keystone_client(endpoint=IDENTITY_ENDPOINT)
        is_up = True
    except (exc.HttpServerError, exc.ClientException):
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time.time()
        keystone.services.list()
        end = time.time()
        milliseconds = (end - start) * 1000

        # gather some vaguely interesting metrics to return
        project_count = len(keystone.projects.list())
        user_count = len(keystone.users.list(domain='Default'))

    status_ok()
    metric_bool('keystone_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('keystone_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
        metric('keystone_user_count', 'uint32', user_count, 'users')
        metric('keystone_tenant_count', 'uint32', project_count, 'tenants')
        metric('keystone_tenant_count', 'uint32', project_count, 'tenants')
Ejemplo n.º 20
0
def check(args):
    try:
        nova = get_openstack_client('compute')

    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        status_err(str(e), m_name='maas_nova')
    else:
        metric_bool('client_success', True, m_name='maas_nova')
        # get some cloud stats
        stats = [nova.get_hypervisor(i.id) for i in nova.hypervisors()]
        cloud_stats = collections.defaultdict(dict)
        count = 0
        for stat in stats:
            count += 1
            setattr(stat, 'count', count)
            for metric_name, vals in iter(stats_mapping.items()):
                multiplier = 1
                if metric_name == 'total_vcpus':
                    multiplier = args.cpu_allocation_ratio
                elif metric_name == 'total_memory':
                    multiplier = args.mem_allocation_ratio
                cloud_stats[metric_name]['value'] = \
                    (getattr(stat, vals['stat_name']) * multiplier)
                cloud_stats[metric_name]['unit'] = \
                    vals['unit']
                cloud_stats[metric_name]['type'] = \
                    vals['type']

    status_ok(m_name='maas_nova')
    for metric_name in iter(cloud_stats):
        metric('cloud_resource_%s' % metric_name,
               cloud_stats[metric_name]['type'],
               cloud_stats[metric_name]['value'],
               cloud_stats[metric_name]['unit'])
Ejemplo n.º 21
0
def check(args):
    metadata_endpoint = ('http://{ip}:8775'.format(ip=args.ip))
    is_up = True

    s = requests.Session()

    try:
        # looks like we can only get / (ec2 versions) without specifying
        # an instance ID and other headers
        versions = s.get('%s/' % metadata_endpoint, verify=False, timeout=10)
        milliseconds = versions.elapsed.total_seconds() * 1000
        if not versions.ok or '1.0' not in versions.content.splitlines():
            is_up = False
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        is_up = False
    except Exception as e:
        status_err(str(e))

    metric_values = dict()

    status_ok()
    metric_bool('nova_api_metadata_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_metadata_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')

        metric_values['nova_api_metadata_local_response_time'] = ('%.3f' %
                                                                  milliseconds)
        metric_influx(INFLUX_MEASUREMENT_NAME, metric_values)
Ejemplo n.º 22
0
def check(args):
    ironic = get_openstack_client('baremetal')

    try:
        ironic_local_endpoint = generate_local_endpoint(
            str(ironic.get_endpoint()), args.ip, args.port,
            args.protocol, '/nodes'
        )
        resp = ironic.session.get(ironic_local_endpoint)

    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_ironic')
        status_err(str(e), m_name='maas_ironic')
    else:
        is_up = resp.status_code == 200
        milliseconds = resp.elapsed.total_seconds() * 1000
        metric_bool('client_success', True, m_name='maas_ironic')

    status_ok(m_name='maas_ironic')
    metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic')
    if is_up:
        metric('ironic_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
Ejemplo n.º 23
0
def check(auth_ref, args):
    # We call get_keystone_client here as there is some logic within to get a
    # new token if previous one is bad.
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    registry_endpoint = 'http://{ip}:9191'.format(ip=args.ip)

    s = Session()

    s.headers.update({
        'Content-type': 'application/json',
        'x-auth-token': auth_token
    })

    try:
        # /images returns a list of public, non-deleted images
        r = s.get('%s/images' % registry_endpoint, verify=False, timeout=10)
        is_up = r.ok
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    except Exception as e:
        status_err(str(e))

    status_ok()
    metric_bool('glance_registry_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        milliseconds = r.elapsed.total_seconds() * 1000
        metric('glance_registry_local_response_time', 'uint32', milliseconds)
Ejemplo n.º 24
0
def check(auth_ref, args):
    MAGNUM_ENDPOINT = 'http://{ip}:9511/v1'.format(ip=args.ip,)

    try:
        if args.ip:
            magnum = get_magnum_client(endpoint=MAGNUM_ENDPOINT)
        else:
            magnum = get_magnum_client()

        api_is_up = True
    except exc.HttpError as e:
        api_is_up = False
        metric_bool('client_success', False, m_name='maas_magnum')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_magnum')
        status_err(str(e), m_name='maas_magnum')
    else:
        metric_bool('client_success', True, m_name='maas_magnum')
        # time something arbitrary
        start = time.time()
        magnum.cluster_templates.list()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok(m_name='maas_magnum')
    metric_bool('magnum_api_local_status', api_is_up, m_name='maas_magnum')
    if api_is_up:
        # only want to send other metrics if api is up
        metric('magnum_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
Ejemplo n.º 25
0
def main():
    # It's easier to parse the output if we make them independent reports
    # If we simply use swift-dispersion-report then we'll have both outputs
    # one after the other and we'll likely have a bad time.
    try:
        object_out = generate_report('object')
        object_match = PARSE_RE.search(object_out)
    except OSError:
        # If the subprocess call returns anything other than exit code 0.
        # we should probably error out too.
        maas_common.status_err('Could not access object dispersion report')

    try:
        container_out = generate_report('container')
        container_match = PARSE_RE.search(container_out)
    except OSError:
        # If the subprocess call returns anything other than exit code 0.
        # we should probably error out too.
        maas_common.status_err('Could not access container dispersion report')

    if not (object_match and container_match):
        maas_common.status_err('Could not parse dispersion report output')

    maas_common.status_ok()
    print_metrics('object', object_match)
    print_metrics('container', container_match)
Ejemplo n.º 26
0
def check(auth_ref, args):

    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    VOLUME_ENDPOINT = 'http://{ip}:8776/v1/{tenant}' \
                      .format(ip=args.ip, tenant=keystone.tenant_id)

    s = requests.Session()

    s.headers.update(
        {'Content-type': 'application/json',
         'x-auth-token': auth_token})

    try:
        r = s.get('%s/os-services' % VOLUME_ENDPOINT,
                  verify=False,
                  timeout=10)
    except (exc.ConnectionError,
            exc.HTTPError,
            exc.Timeout) as e:
        status_err(str(e))

    if not r.ok:
        status_err('could not get response from cinder api')

    status_ok()
    services = r.json()['services']
    for service in services:
        service_is_up = True
        if service['status'] == 'enabled' and service['state'] != 'up':
            service_is_up = False
        metric_bool('%s_on_host_%s' %
                    (service['binary'], service['host']),
                    service_is_up)
Ejemplo n.º 27
0
def main():
    galera_hostname = args.galera_hostname
    holland_bin = args.holland_binary
    holland_bs = args.holland_backupset

    today = datetime.date.today().strftime('%Y%m%d')
    yesterday = (datetime.date.today() -
                 datetime.timedelta(days=1)).strftime('%Y%m%d')

    # Get completed Holland backup set
    backupsets = \
        holland_lb_check(galera_hostname, holland_bin, holland_bs)

    if len(
        [backup
         for backup in backupsets if yesterday or today in backup[0]]) > 0:
        status_ok(m_name='maas_holland')
        metric_bool('holland_backup_status', True, m_name='maas_holland')
    else:
        metric_bool('holland_backup_status', False, m_name='maas_holland')
        status_err('Could not find Holland backup from %s or %s' %
                   (yesterday, today),
                   m_name='maas_holland')

    # Print metric about last backup
    print_metrics('holland_backup_size',
                  "{0:.1f}".format(float(backupsets[-1][1]) / 1024))
Ejemplo n.º 28
0
def check(args):

    COMPUTE_ENDPOINT = 'http://{ip}:8774/v3'.format(ip=args.ip)

    try:
        nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)
        is_up = True
    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        nova.services.list()
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('nova_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_local_response_time', 'uint32', '%.3f' % milliseconds,
               'ms')
Ejemplo n.º 29
0
def check(auth_ref, args):

    ironic_endpoint = ('{protocol}://{ip}:{port}/v1'.format(
        ip=args.ip, protocol=args.protocol, port=args.port))

    try:
        if args.ip:
            ironic = get_ironic_client(endpoint=ironic_endpoint)
        else:
            ironic = get_ironic_client()

        is_up = True

    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_ironic')
        status_err(str(e), m_name='maas_ironic')
    else:
        metric_bool('client_success', True, m_name='maas_ironic')
        # time something arbitrary
        start = time.time()
        ironic.node.list()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok(m_name='maas_ironic')
    metric_bool('ironic_api_local_status', is_up, m_name='maas_ironic')
    if is_up:
        # only want to send other metrics if api is up
        metric('ironic_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
Ejemplo n.º 30
0
def check(auth_ref, args):
    # We call get_keystone_client here as there is some logic within to get a
    # new token if previous one is bad.
    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    registry_endpoint = 'http://{ip}:9191'.format(ip=args.ip)

    s = Session()

    s.headers.update(
        {'Content-type': 'application/json',
         'x-auth-token': auth_token})

    try:
        # /images returns a list of public, non-deleted images
        r = s.get('%s/images' % registry_endpoint, verify=False, timeout=10)
        is_up = r.ok
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
    except Exception as e:
        status_err(str(e))

    status_ok()
    metric_bool('glance_registry_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        milliseconds = r.elapsed.total_seconds() * 1000
        metric('glance_registry_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
Ejemplo n.º 31
0
def main(args):

    bind_ip = str(args.ip)
    port = args.port
    is_up = True

    try:
        stats = item_stats(bind_ip, port)
        current_version = stats['version']
    except (TypeError, IndexError):
        is_up = False
        metric_bool('client_success', False, m_name='maas_memcached')
    else:
        is_up = True
        metric_bool('client_success', True, m_name='maas_memcached')
        if current_version not in VERSIONS:
            status_err('This plugin has only been tested with version %s '
                       'of memcached, and you are using version %s' %
                       (VERSIONS, current_version),
                       m_name='maas_memcached')

    status_ok(m_name='maas_memcached')
    metric_bool('memcache_api_local_status', is_up, m_name='maas_memcached')
    if is_up:
        for m, u in MEMCACHE_METRICS.iteritems():
            metric('memcache_%s' % m, 'uint64', stats[m], u)
def check(args):

    NETWORK_ENDPOINT = "http://{hostname}:9696".format(hostname=args.hostname)
    try:
        neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        status_err(str(e))

    # gather nova service states
    if args.host:
        agents = neutron.list_agents(host=args.host)["agents"]
    else:
        agents = neutron.list_agents()["agents"]

    if len(agents) == 0:
        status_err("No host(s) found in the agents list")

    # return all the things
    status_ok()
    for agent in agents:
        agent_is_up = True
        if agent["admin_state_up"] and not agent["alive"]:
            agent_is_up = False

        if args.host:
            name = "%s_status" % agent["binary"]
        else:
            name = "%s_%s_on_host_%s" % (agent["binary"], agent["id"], agent["host"])

        metric_bool(name, agent_is_up)
Ejemplo n.º 33
0
def check(args):

    NETWORK_ENDPOINT = 'http://{hostname}:9696'.format(hostname=args.hostname)
    try:
        neutron = get_neutron_client(endpoint_url=NETWORK_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        status_err(str(e))

    # gather nova service states
    if args.host:
        agents = neutron.list_agents(host=args.host)['agents']
    else:
        agents = neutron.list_agents()['agents']

    if len(agents) == 0:
        status_err("No host(s) found in the agents list")

    # return all the things
    status_ok()
    for agent in agents:
        agent_is_up = True
        if agent['admin_state_up'] and not agent['alive']:
            agent_is_up = False

        if args.host:
            name = '%s_status' % agent['binary']
        else:
            name = '%s_%s_on_host_%s' % (agent['binary'], agent['id'],
                                         agent['host'])

        name = name.replace(".", "_")
        metric_bool(name, agent_is_up)
Ejemplo n.º 34
0
def main():
    usage = "Usage: %prog [-h] [-H] username password"

    parser = optparse.OptionParser(usage=usage)
    parser.add_option(
        '-H',
        '--host',
        action='store',
        dest='host',
        default=None,
        help="Allow user to connect to something other than localhost")
    (options, args) = parser.parse_args()

    # We will need the username and password to connect to the database
    if len(args) != 2:
        parser.print_help()
        raise SystemExit(True)

    # According to
    # http://www.percona.com/doc/percona-toolkit/2.2/pt-table-checksum.html
    # If the exit status is 0, everything is okay, otherwise the exit status
    # will be non-zero. We don't need stdout at the moment so we can discard
    # it. Stderr should contain any problems we run across.
    (status, _, err) = table_checksum(args[0], args[1], options.host)
    if status != 0:
        status_err(err.strip())
        raise SystemExit(True)

    status_ok()
Ejemplo n.º 35
0
def check(args):

    COMPUTE_ENDPOINT = 'http://{ip}:8774/v3'.format(ip=args.ip)

    try:
        nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)
        is_up = True
    except exc.ClientException:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        nova.services.list()
        end = time()
        milliseconds = (end - start) * 1000

        # gather some metrics
        status_count = collections.Counter(
            [s.status for s in nova.servers.list()])

    status_ok()
    metric_bool('nova_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_local_response_time', 'double', '%.3f' % milliseconds,
               'ms')
        for status in SERVER_STATUSES:
            metric('nova_servers_in_state_%s' % status, 'uint32',
                   status_count[status])
Ejemplo n.º 36
0
def check_process_running(process_names, container_name=None):
    """Check to see if processes are running.

       Check if each of the processes in process_names are in a list
       of running processes in the specified container name, or on
       this host.
    """

    if not process_names:
        # The caller has not provided a value for process_names, which gives us
        # nothing to do. Return an error for the check.
        status_err('No process names provided')

    procs_path = '/sys/fs/cgroup/cpu/cgroup.procs'
    if container_name is not None:
        # Checking for processes in a container, not the parent host
        procs_path = os.path.join('/sys/fs/cgroup/cpu/lxc', container_name,
                                  'cgroup.procs')
    procs = get_processes(procs_path)

    if not procs:
        # Unable to get a list of process names for the container or host.
        status_err('Could not get a list of running processes')

    # Since we've fetched a process list, report status_ok.
    status_ok()

    # Report the presence of each process from the command line in the
    # running process list for the host or specified container.
    for process_name in process_names:
        metric_bool('%s_process_status' % process_name, process_name in procs)
Ejemplo n.º 37
0
def check(args):
    octavia = get_openstack_client('load_balancer')

    try:
        if args.ip:
            octavia_local_endpoint = generate_local_endpoint(
                str(octavia.get_endpoint()), args.ip, args.port, args.protocol,
                '/lbaas/loadbalancers')
            resp = octavia.session.get(octavia_local_endpoint, timeout=180)

    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
        metric_bool('client_success', False, m_name='maas_octavia')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_octavia')
        status_err(str(e), m_name='maas_octavia')
    else:
        is_up = resp.ok
        metric_bool('client_success', True, m_name='maas_octavia')

    status_ok(m_name='maas_octavia')
    metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia')
    if is_up:
        loadbalancers = resp.json()['loadbalancers']
        num = len([
            lb for lb in loadbalancers if lb['provisioning_status'] == 'ERROR'
        ])
        # only want to send other metrics if api is up
        metric('octavia_num_lb_in_error_status', 'uint32', num, 'ms')
Ejemplo n.º 38
0
def main():

    try:
        os.stat('/usr/sbin/ssacli')
        ssacli_bin = 'ssacli'
    except Exception:
        try:
            os.stat('/usr/sbin/hpssacli')
            ssacli_bin = 'hpssacli'
        except Exception:
            maas_common.status_err('Neither ssacli or hpssacli could be found',
                                   m_name='hp_monitoring')

    status = {}
    status['hardware_processors_status'] = \
        get_chassis_status('hpasmcli', 'server')
    status['hardware_memory_status'] = get_chassis_status('hpasmcli', 'dimm')
    status['hardware_powersupply_status'] = \
        get_powersupply_status('hpasmcli', 'powersupply')
    status['hardware_disk_status'] = get_drive_status(ssacli_bin)
    status['hardware_controller_status'] = get_controller_status(ssacli_bin)
    status['hardware_controller_cache_status'] = \
        get_controller_cache_status(ssacli_bin)
    status['hardware_controller_battery_status'] = \
        get_controller_battery_status(ssacli_bin)

    maas_common.status_ok(m_name='maas_hwvendor')
    for name, value in status.viewitems():
        maas_common.metric_bool(name, value, m_name='maas_hwvendor')
Ejemplo n.º 39
0
def main():

    try:
        os.stat('/usr/sbin/ssacli')
        ssacli_bin = 'ssacli'
    except Exception:
        try:
            os.stat('/usr/sbin/hpssacli')
            ssacli_bin = 'hpssacli'
        except Exception:
            maas_common.status_err('Neither ssacli or hpssacli could be found',
                                   m_name='hp_monitoring')

    status = {}
    status['hardware_processors_status'] = \
        get_chassis_status('hpasmcli', 'server')
    status['hardware_memory_status'] = get_chassis_status('hpasmcli', 'dimm')
    status['hardware_disk_status'] = get_drive_status(ssacli_bin)
    status['hardware_controller_status'] = get_controller_status(ssacli_bin)
    status['hardware_controller_cache_status'] = \
        get_controller_cache_status(ssacli_bin)
    status['hardware_controller_battery_status'] = \
        get_controller_battery_status(ssacli_bin)

    maas_common.status_ok(m_name='maas_hwvendor')
    for name, value in status.viewitems():
        maas_common.metric_bool(name, value, m_name='maas_hwvendor')
def check(args):
    glance = get_openstack_client('image')

    try:
        # Remove version from returned endpoint
        glance_endpoint = str(glance.get_endpoint().rsplit('/', 2)[0])
        local_registry_url = generate_local_endpoint(
            glance_endpoint, args.ip, args.port, args.protocol,
            '/images'
        )
        resp = glance.session.get(local_registry_url, timeout=180)
        milliseconds = resp.elapsed.total_seconds() * 1000

        is_up = resp.status_code == 200
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
        metric_bool('client_success', False, m_name='maas_glance')
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_glance')
        status_err(str(e), m_name='maas_glance')

    status_ok(m_name='maas_glance')
    metric_bool('client_success', True, m_name='maas_glance')
    metric_bool('glance_registry_local_status', is_up, m_name='maas_glance')
    # Only send remaining metrics if the API is up
    if is_up:
        metric('glance_registry_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
def check(args, tenant_id):

    CEILOMETER_ENDPOINT = 'http://{ip}:8777'.format(ip=args.ip)

    try:
        ceilometer = get_ceilometer_client(endpoint=CEILOMETER_ENDPOINT)
        is_up = True
    except exc.HTTPException as e:
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        meters = ceilometer.meters.list()
        # Exceptions are only thrown when we iterate over meter
        [i.meter_id for i in meters]
        end = time()
        milliseconds = (end - start) * 1000

    status_ok()
    metric_bool('ceilometer_api_local_status', is_up)
    if is_up:
        # only want to send other metrics if api is up
        metric('ceilometer_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
Ejemplo n.º 42
0
def print_metrics(replica_status):
    status_ok()
    metric('wsrep_replicated_bytes', 'int64',
           replica_status['wsrep_replicated_bytes'], 'bytes')
    metric('wsrep_received_bytes', 'int64',
           replica_status['wsrep_received_bytes'], 'bytes')
    metric('wsrep_commit_window_size', 'double',
           replica_status['wsrep_commit_window'], 'sequence_delta')
    metric('wsrep_cluster_size', 'int64',
           replica_status['wsrep_cluster_size'], 'nodes')
    metric('queries_per_second', 'int64',
           replica_status['Queries'], 'qps')
    metric('wsrep_cluster_state_uuid', 'string',
           replica_status['wsrep_cluster_state_uuid'])
    metric('wsrep_cluster_status', 'string',
           replica_status['wsrep_cluster_status'])
    metric('wsrep_local_state_uuid', 'string',
           replica_status['wsrep_local_state_uuid'])
    metric('wsrep_local_state_comment', 'string',
           replica_status['wsrep_local_state_comment'])
    metric('mysql_max_configured_connections', 'int64',
           replica_status['max_connections'], 'connections')
    metric('mysql_current_connections', 'int64',
           replica_status['Threads_connected'], 'connections')
    metric('mysql_max_seen_connections', 'int64',
           replica_status['Max_used_connections'], 'connections')
Ejemplo n.º 43
0
def main(args):
    if len(args.omc) != 2:
        args = ' '.join(args.omc)
        status_err('Requires 2 arguments, arguments provided: "%s"' % args,
                   m_name='maas_hwvendor')

    report_type = args.omc[0].lower()
    report_request = args.omc[1].lower()

    # If we're not using the correct version of OpenManage, error out
    check_openmanage_version()

    try:
        report = hardware_report(report_type, report_request)
    except (OSError, subprocess.CalledProcessError) as e:
        metric_bool('hardware_%s_status' % report_request, False)
        status_err(str(e), m_name='maas_hwvendor')

    status_ok(m_name='maas_hwvendor')
    if report_request == 'pwrsupplies':
        metric_bool('hardware_%s_status' % report_request,
                    all_okay(report, regex[report_request]))
    else:
        metric_bool('hardware_%s_status' % report_request,
                    all_okay(report, regex[report_type]))
Ejemplo n.º 44
0
def check(args):

    IDENTITY_ENDPOINT = 'http://{ip}:35357/v2.0'.format(ip=args.ip)

    try:
        keystone = get_keystone_client(endpoint=IDENTITY_ENDPOINT)
        is_up = True
    except (exc.HttpServerError, exc.ClientException):
        is_up = False
    # Any other exception presumably isn't an API error
    except Exception as e:
        status_err(str(e))
    else:
        # time something arbitrary
        start = time()
        keystone.services.list()
        end = time()
        milliseconds = (end - start) * 1000

        # gather some vaguely interesting metrics to return
        tenant_count = len(keystone.tenants.list())
        user_count = len(keystone.users.list())

    status_ok()
    metric_bool('keystone_api_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('keystone_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
        metric('keystone_user_count', 'uint32', user_count)
        metric('keystone_tenant_count', 'uint32', tenant_count)
Ejemplo n.º 45
0
def check(args):
    keystone = get_openstack_client('identity')

    local_keystone_endpoint = "{}://{}:{}/v{}/services".format(
        args.protocol, args.ip, args.port,
        keystone.get_api_major_version()[0])

    try:
        resp = keystone.session.get('%s' % local_keystone_endpoint,
                                    timeout=180)
        milliseconds = resp.elapsed.total_seconds() * 1000

        is_up = resp.ok
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
        metric_bool('client_success', False, m_name='maas_keystone')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_keystone')
        status_err(str(e), m_name='maas_keystone')
    else:
        metric_bool('client_success', True, m_name='maas_keystone')
        # gather some vaguely interesting metrics to return
        project_count = len([i for i in keystone.projects()])
        user_count = len([i for i in keystone.users()])

    status_ok(m_name='maas_keystone')
    metric_bool('keystone_api_local_status', is_up, m_name='maas_keystone')
    if is_up:
        metric('keystone_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
        metric('keystone_user_count', 'uint32', user_count, 'users')
        metric('keystone_tenant_count', 'uint32', project_count, 'tenants')
Ejemplo n.º 46
0
def check(auth_ref, args):

    keystone = get_keystone_client(auth_ref)
    auth_token = keystone.auth_token
    VOLUME_ENDPOINT = 'http://{ip}:8776/v1/{tenant}' \
                      .format(ip=args.ip, tenant=keystone.tenant_id)

    s = requests.Session()

    s.headers.update({
        'Content-type': 'application/json',
        'x-auth-token': auth_token
    })

    try:
        r = s.get('%s/os-services' % VOLUME_ENDPOINT, verify=False, timeout=10)
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        status_err(str(e))

    if not r.ok:
        status_err('could not get response from cinder api')

    status_ok()
    services = r.json()['services']
    for service in services:
        service_is_up = True
        if service['status'] == 'enabled' and service['state'] != 'up':
            service_is_up = False
        metric_bool('%s_on_host_%s' % (service['binary'], service['host']),
                    service_is_up)
Ejemplo n.º 47
0
def check(args):
    octavia = get_openstack_client('load_balancer')

    try:
        if args.ip:
            octavia_local_endpoint = generate_local_endpoint(
                str(octavia.get_endpoint()), args.ip, args.port,
                args.protocol, '/lbaas/loadbalancers?limit=1'
            )
            resp = octavia.session.get(octavia_local_endpoint, timeout=180)

    except (exc.HTTPError, exc.Timeout, exc.ConnectionError):
        is_up = False
        metric_bool('client_success', False, m_name='maas_octavia')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_octavia')
        status_err(str(e), m_name='maas_octavia')
    else:
        is_up = resp.ok
        metric_bool('client_success', True, m_name='maas_octavia')
        milliseconds = resp.elapsed.total_seconds() * 1000

    status_ok(m_name='maas_octavia')
    metric_bool('octavia_api_local_status', is_up, m_name='maas_octavia')
    if is_up:
        # only want to send other metrics if api is up
        metric('octavia_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
Ejemplo n.º 48
0
def check(args):
    try:
        # attempt a query to example.com
        # return good check on any valid response
        start = datetime.datetime.now()
        message = dns.message.make_query("example.org", "A")
        answer = dns.query.udp(message, timeout=5, where=args.ip, port=5354)
        end = datetime.datetime.now()
        # int of return code
        mdns_is_up = (answer.rcode() <= 16)
    except (dns.exception.Timeout):
        mdns_is_up = False
        metric_bool('client_success', False, m_name='maas_designate')
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_designate')
        status_err(str(e), m_name='maas_designate')
    else:
        metric_bool('client_success', True, m_name='maas_designate')
        dt = (end - start)
        milliseconds = (dt.microseconds + dt.seconds * 10 ** 6) / 10 ** 3

    status_ok(m_name='maas_designate')
    metric_bool('designate_mdns_local_status',
                mdns_is_up, m_name='maas_designate')
    if mdns_is_up:
        # only want to send other metrics if api is up
        metric('designate_mdns_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
Ejemplo n.º 49
0
def check(args):

    COMPUTE_ENDPOINT = 'http://{ip}:8774/v3'.format(ip=args.ip)
    try:
        nova = get_nova_client(bypass_url=COMPUTE_ENDPOINT)

    # not gathering api status metric here so catch any exception
    except Exception as e:
        status_err(str(e))

    # gather nova service states
    if args.host:
        services = nova.services.list(host=args.host)
    else:
        services = nova.services.list()

    if len(services) == 0:
        status_err("No host(s) found in the service list")

    # return all the things
    status_ok()
    for service in services:
        service_is_up = True

        if service.status == 'enabled' and service.state == 'down':
            service_is_up = False

        if args.host:
            name = '%s_status' % service.binary
        else:
            name = '%s_on_host_%s_status' % (service.binary, service.host)

        metric_bool(name, service_is_up)
Ejemplo n.º 50
0
def main():
    # It's easier to parse the output if we make them independent reports
    # If we simply use swift-dispersion-report then we'll have both outputs
    # one after the other and we'll likely have a bad time.
    try:
        object_out = generate_report('object')
        object_match = PARSE_RE.search(object_out)
    except OSError:
        # If the subprocess call returns anything other than exit code 0.
        # we should probably error out too.
        maas_common.status_err('Could not access object dispersion report',
                               m_name='maas_swift')

    try:
        container_out = generate_report('container')
        container_match = PARSE_RE.search(container_out)
    except OSError:
        # If the subprocess call returns anything other than exit code 0.
        # we should probably error out too.
        maas_common.status_err('Could not access container dispersion report',
                               m_name='maas_swift')

    if not (object_match and container_match):
        maas_common.status_err('Could not parse dispersion report output',
                               m_name='maas_swift')

    maas_common.status_ok(m_name='maas_swift')
    print_metrics('object', object_match)
    print_metrics('container', container_match)
Ejemplo n.º 51
0
def main():
    metrics = {}
    session = requests.Session()  # Make a Session to store the auth creds
    session.auth = (options.username, options.password)

    protocol = 'https' if options.https else 'http'

    _get_connection_metrics(session, metrics, protocol,
                            options.host, options.port)
    _get_overview_metrics(session, metrics, protocol,
                          options.host, options.port)
    _get_node_metrics(session, metrics, protocol, options.host,
                      options.port, options.name)
    _get_queue_metrics(session, metrics, protocol, options.host,
                       options.port)
    _get_consumer_metrics(session, metrics, protocol, options.host,
                          options.port)

    status_ok(m_name='maas_rabbitmq')

    for k, v in metrics.items():
        if v['value'] is True or v['value'] is False:
            metric_bool('rabbitmq_%s_status' % k, not v['value'])
        else:
            metric('rabbitmq_%s' % k, 'int64', v['value'], v['unit'])
Ejemplo n.º 52
0
def check(auth_ref, args):
    MAGNUM_ENDPOINT = 'http://{ip}:9511/v1'.format(ip=args.ip, )

    try:
        if args.ip:
            magnum = get_magnum_client(endpoint=MAGNUM_ENDPOINT)
        else:
            magnum = get_magnum_client()

        api_is_up = True
    except exc.HttpError as e:
        api_is_up = False
        metric_bool('client_success', False, m_name='maas_magnum')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_magnum')
        status_err(str(e), m_name='maas_magnum')
    else:
        metric_bool('client_success', True, m_name='maas_magnum')
        # time something arbitrary
        start = time.time()
        magnum.cluster_templates.list()
        end = time.time()
        milliseconds = (end - start) * 1000

    status_ok(m_name='maas_magnum')
    metric_bool('magnum_api_local_status', api_is_up, m_name='maas_magnum')
    if api_is_up:
        # only want to send other metrics if api is up
        metric('magnum_api_local_response_time', 'double',
               '%.3f' % milliseconds, 'ms')
Ejemplo n.º 53
0
def check(args):
    heat = get_openstack_client('orchestration')

    try:
        local_heat_endpoint = generate_local_endpoint(str(heat.get_endpoint()),
                                                      args.ip, args.port,
                                                      args.protocol,
                                                      '/build_info')
        resp = heat.session.get(local_heat_endpoint)

    except (exc.ConnectionError, exc.HTTPError, exc.Timeout):
        is_up = False
        metric_bool('client_success', False, m_name='maas_heat')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_heat')
        status_err(str(e), m_name='maas_heat')
    else:
        is_up = True
        milliseconds = resp.elapsed.total_seconds() * 1000
        metric_bool('client_success', True, m_name='maas_heat')

    status_ok(m_name='maas_heat')
    metric_bool('heat_api_local_status', is_up, m_name='maas_heat')
    if is_up:
        # only want to send other metrics if api is up
        metric('heat_api_local_response_time', 'double', '%.3f' % milliseconds,
               'ms')
Ejemplo n.º 54
0
def check(auth_ref, args):
    OCTAVIA_ENDPOINT = 'http://{ip}:9876/v1'.format(ip=args.ip,)

    try:
        if args.ip:
            endpoint = OCTAVIA_ENDPOINT
        else:
            endpoint = get_endpoint_url_for_service(
                'load-balancer', auth_ref, 'internal')
        # time something arbitrary
        start = datetime.datetime.now()
        r = requests.get(endpoint + "/v1/loadbalancers?limit=1")
        end = datetime.datetime.now()
        api_is_up = (r.status_code == 200)
    except (requests.HTTPError, requests.Timeout, requests.ConnectionError):
        api_is_up = False
        metric_bool('client_success', False, m_name='maas_octavia')
    # Any other exception presumably isn't an API error
    except Exception as e:
        metric_bool('client_success', False, m_name='maas_octavia')
        status_err(str(e), m_name='maas_octavia')
    else:
        metric_bool('client_success', True, m_name='maas_octavia')
        dt = (end - start)
        milliseconds = (dt.microseconds + dt.seconds * 10 ** 6) / 10 ** 3

    status_ok(m_name='maas_octavia')
    metric_bool('octavia_api_local_status', api_is_up, m_name='maas_octavia')
    if api_is_up:
        # only want to send other metrics if api is up
        metric('octavia_api_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
def check(args):
    metadata_endpoint = ('http://{ip}:8775'.format(ip=args.ip))
    is_up = True

    s = requests.Session()

    try:
        # looks like we can only get / (ec2 versions) without specifying
        # an instance ID and other headers
        versions = s.get('%s/' % metadata_endpoint,
                         verify=False,
                         timeout=10)
        milliseconds = versions.elapsed.total_seconds() * 1000
        if not versions.ok or '1.0' not in versions.content.splitlines():
            is_up = False
    except (exc.ConnectionError, exc.HTTPError, exc.Timeout) as e:
        is_up = False
    except Exception as e:
        status_err(str(e))

    status_ok()
    metric_bool('nova_api_metadata_local_status', is_up)
    # only want to send other metrics if api is up
    if is_up:
        metric('nova_api_metadata_local_response_time',
               'double',
               '%.3f' % milliseconds,
               'ms')
Ejemplo n.º 56
0
def check(args):
    try:
        nova = get_openstack_client('compute')

    except Exception as e:
        metric_bool('client_success', False, m_name='maas_nova')
        status_err(str(e), m_name='maas_nova')
    else:
        metric_bool('client_success', True, m_name='maas_nova')
        # get some cloud stats
        stats = [nova.get_hypervisor(i.id) for i in nova.hypervisors()]
        cloud_stats = collections.defaultdict(dict)
        count = 0
        for stat in stats:
            count += 1
            setattr(stat, 'count', count)
            for metric_name, vals in stats_mapping.iteritems():
                multiplier = 1
                if metric_name == 'total_vcpus':
                    multiplier = args.cpu_allocation_ratio
                elif metric_name == 'total_memory':
                    multiplier = args.mem_allocation_ratio
                cloud_stats[metric_name]['value'] = \
                    (getattr(stat, vals['stat_name']) * multiplier)
                cloud_stats[metric_name]['unit'] = \
                    vals['unit']
                cloud_stats[metric_name]['type'] = \
                    vals['type']

    status_ok(m_name='maas_nova')
    for metric_name in cloud_stats.iterkeys():
        metric('cloud_resource_%s' % metric_name,
               cloud_stats[metric_name]['type'],
               cloud_stats[metric_name]['value'],
               cloud_stats[metric_name]['unit'])