예제 #1
0
def check(warn, crit, project):
    if crit < warn:
        msg = "critical threshold cannot be lower than warning threshold: %d < %d"
        raise ValueError(msg % (crit, warn))

    if not project:
        project = openshift.get_project()

    results = []
    errors = []

    pcs = openshift.get_running_pod_containers(project)

    for pod_name, container_name, container_data in pcs:
        memory_limit = container_data.get('resources',
                                          {}).get('limits', {}).get('memory')
        try:
            memory_usage = openshift.exec_in_pod_container(
                project, pod_name, container_name, check_memory_usage_cmd)
            if memory_limit:
                memory_limit = openshift.exec_in_pod_container(
                    project, pod_name, container_name, check_memory_limit_cmd)
                results.extend(
                    analize(pod_name, container_name, memory_limit,
                            memory_usage, warn, crit))
        except Exception as e:
            errors.append((pod_name, container_name, e))

    return report(results, errors)
예제 #2
0
def check(service, containers):
    project = openshift.get_project()

    selectors = openshift.get_service_selectors(project, service)
    pcs = openshift.get_running_pod_containers(
        project, selector=selectors, container_names=containers.split(','))

    # since core only has single mysql with replication, we only check one even if there are multiple results
    pod_name, container_name, container_data = pcs[0]

    env = openshift.get_container_env(project, pod_name, container_name)

    args = (check_mysql_path, "-H", service, "-u", env["MYSQL_USER"], "-p",
            env["MYSQL_PASSWORD"])

    try:
        output = check_output(args)
        nag_status = nagios.OK
    except CalledProcessError as cpe:
        if cpe.returncode in (nagios.OK, nagios.WARN, nagios.CRIT,
                              nagios.UNKNOWN):
            output = cpe.output
            nag_status = cpe.returncode
        else:
            output = str(cpe)
            nag_status = nagios.UNKNOWN

    return report(nag_status, output)
예제 #3
0
def check(warn, crit, minimum, project):
    if crit < warn:
        msg = "critical threshold cannot be lower than warning threshold: %d < %d"
        raise ValueError(msg % (crit, warn))

    if not project:
        project = openshift.get_project()

    results = []
    errors = []

    pcs = openshift.get_running_pod_containers(project)

    for pod_name, container_name, container_data in pcs:
        if "mongo" in pod_name:
            try:
                result = openshift.exec_in_pod_container(
                    project, pod_name, container_name, check_disk_cmd)

                results.extend(
                    analize(pod_name, container_name, parse_df_lines(result),
                            warn, crit))
            except Exception as e:
                errors.append((pod_name, container_name, e))

    return report(results, errors, minimum)
예제 #4
0
def check(service, containers):
    project = openshift.get_project()

    selectors = openshift.get_service_selectors(project, service)
    pcs = openshift.get_running_pod_containers(project, selector=selectors, container_names=containers.split(','))

    # since core only has single mysql with replication, we only check one even if there are multiple results
    pod_name, container_name, container_data = pcs[0]

    env = openshift.get_container_env(project, pod_name, container_name)

    args = (check_mysql_path, "-H", service, "-u", env["MYSQL_USER"], "-p", env["MYSQL_PASSWORD"])

    try:
        output = check_output(args)
        nag_status = nagios.OK
    except CalledProcessError as cpe:
        if cpe.returncode in (nagios.OK, nagios.WARN, nagios.CRIT, nagios.UNKNOWN):
            output = cpe.output
            nag_status = cpe.returncode
        else:
            output = str(cpe)
            nag_status = nagios.UNKNOWN

    return report(nag_status, output)
예제 #5
0
def check(containers):
    project = openshift.get_project()

    pods = openshift.get_running_pod_names(project, container_names=containers.split(','))

    rs_statuses = map(parse_mongo_result, openshift.exec_in_pods(project, pods, check_mongodb_cmd))
    nag_statuses = map(analize, rs_statuses)

    return report(pods, rs_statuses, nag_statuses)
def check(containers):
    project = openshift.get_project()
    pods = openshift.get_running_pod_names(
        project, container_names=containers.split(','))
    collections = map(parse_mongo_result,
                      openshift.exec_in_pods(project, pods, check_mongodb_cmd))
    result = topic_existance(topics, collections)

    return report(pods, project, result)
예제 #7
0
def check(containers):
    project = openshift.get_project()

    pods = openshift.get_running_pod_names(
        project, container_names=containers.split(','))

    rs_statuses = map(parse_mongo_result,
                      openshift.exec_in_pods(project, pods, check_mongodb_cmd))
    nag_statuses = map(analize, rs_statuses)

    return report(pods, rs_statuses, nag_statuses)
예제 #8
0
def check(warn, crit):
    if crit < warn:
        msg = "critical threshold cannot be lower than warning threshold: %d < %d"
        raise ValueError(msg % (crit, warn))

    project = openshift.get_project()

    results = []

    pods = openshift.get_running_pod_names(project)
    execs = openshift.exec_in_pods(project, pods, check_disk_cmd)
    for pod, lines in zip(pods, execs):
        results.extend(analize(pod, parse_df_lines(lines), warn, crit))

    return report(results)
예제 #9
0
def check():
    issues = []
    project = openshift.get_project()
    deploymentConfigs = openshift.get_deploymentconfigs(project)
    for deploymentConfig in deploymentConfigs["items"]:
        componentName = deploymentConfig["metadata"]["name"]
        pods = openshift.get_running_pod_names(
            project, container_names=componentName)
        nodes = openshift.get_nodes_from_names(pods)
        if len(pods) > 1:
            for node in set(nodes):
                nodeCount = nodes.count(node)
                if nodeCount > 1:
                    issues.append("WARN: %s has %s pods running on the same node: %s" % (
                        componentName, nodeCount, node))
    return report(issues)
예제 #10
0
def check(project):
    if not project:
        project = openshift.get_project()

    results = []
    errors = []

    pcs = openshift.get_running_pod_containers(project)

    for pod_name, container_name, container_data in pcs:
        resource_limits = container_data.get('resources', {}).get('limits', {})
        try:
            results.extend(analize(pod_name, container_name, resource_limits))
        except Exception as e:
            errors.append((pod_name, container_name, e))

    return report(results, errors)
예제 #11
0
def check(project):
    if not project:
        project = openshift.get_project()

    results = []
    errors = []

    pcs = openshift.get_running_pod_containers(project)

    for pod_name, container_name, container_data in pcs:
        resource_limits = container_data.get('resources', {}).get('limits', {})
        try:
            results.extend(analize(pod_name, container_name, resource_limits))
        except Exception as e:
            errors.append((pod_name, container_name, e))

    return report(results, errors)
예제 #12
0
def check():
    project = openshift.get_project()
    pods = openshift.get_running_pod_names(project, container_names="mongodb")
    if not pods:
        output = "Unable to locate any mongodb containers"
        return nagios.UNKNOWN
    nodes = openshift.get_nodes_from_names(pods)
    nodes_pods = dict(zip(pods, nodes))
    if len(nodes) < 3:
        output = nodes_pods
        return nagios.CRIT
    if nodes[0] == nodes[1] or nodes[0] == nodes[2] or nodes[1] == nodes[2]:
        output = nodes_pods
        nag_status = nagios.WARN
    else:
        output = nodes_pods
        nag_status = nagios.OK
    return report(nag_status, output)
예제 #13
0
def check(warn, crit, minimum, project):
    if crit < warn:
        msg = "critical threshold cannot be lower than warning threshold: %d < %d"
        raise ValueError(msg % (crit, warn))

    if not project:
        project = openshift.get_project()

    results = []
    errors = []

    pcs = openshift.get_running_pod_containers(project)

    for pod_name, container_name, container_data in pcs:
        try:
            result = openshift.exec_in_pod_container(project, pod_name, container_name, check_disk_cmd)

            results.extend(analize(pod_name, container_name, parse_df_lines(result), warn, crit))
        except Exception as e:
            errors.append((pod_name, container_name, e))

    return report(results, errors, minimum)
예제 #14
0
def check(warn, crit, project):
    if crit < warn:
        msg = "critical threshold cannot be lower than warning threshold: %d < %d"
        raise ValueError(msg % (crit, warn))

    if not project:
        project = openshift.get_project()

    results = []
    errors = []

    prev_cpu_usage = read_cpu_usage()
    curr_cpu_usage = {}
    pcs = openshift.get_running_pod_containers(project)

    for pod_name, container_name, container_data in pcs:
        cpu_limit = container_data.get('resources', {}).get('limits',
                                                            {}).get('cpu')
        try:
            if cpu_limit:
                curr_usage, curr_uptime, limit = get_container_cpu_usage(
                    project, pod_name, container_name)
                curr_cpu_usage[pod_name] = curr_cpu_usage.get(pod_name, {})
                curr_cpu_usage[pod_name][container_name] = [
                    curr_usage, curr_uptime
                ]
                prev_usage, prev_uptime = prev_cpu_usage.get(pod_name, {}).get(
                    container_name, [None, None])
                results.extend(
                    analize(pod_name, container_name, prev_usage, curr_usage,
                            prev_uptime, curr_uptime, limit, warn, crit))
        except Exception as e:
            errors.append((pod_name, container_name, e))

    write_cpu_usage(curr_cpu_usage)
    return report(results, errors)
예제 #15
0
def check(project):
    if not project:
        project = openshift.get_project()
    results, error = do_request(project)
    return report(results, error)
예제 #16
0
def check(project):
    if not project:
        project = openshift.get_project()
    data = do_request(project)
    results, errors = parse_response(data)
    return report(results, errors)