def check(warn, crit, project): if crit < warn: msg = "critical threshold cannot be lower than warning threshold: %d < %d" raise ValueError(msg % (crit, warn)) if not project: project = openshift.get_project() results = [] errors = [] pcs = openshift.get_running_pod_containers(project) for pod_name, container_name, container_data in pcs: memory_limit = container_data.get('resources', {}).get('limits', {}).get('memory') try: memory_usage = openshift.exec_in_pod_container( project, pod_name, container_name, check_memory_usage_cmd) if memory_limit: memory_limit = openshift.exec_in_pod_container( project, pod_name, container_name, check_memory_limit_cmd) results.extend( analize(pod_name, container_name, memory_limit, memory_usage, warn, crit)) except Exception as e: errors.append((pod_name, container_name, e)) return report(results, errors)
def check(service, containers): project = openshift.get_project() selectors = openshift.get_service_selectors(project, service) pcs = openshift.get_running_pod_containers( project, selector=selectors, container_names=containers.split(',')) # since core only has single mysql with replication, we only check one even if there are multiple results pod_name, container_name, container_data = pcs[0] env = openshift.get_container_env(project, pod_name, container_name) args = (check_mysql_path, "-H", service, "-u", env["MYSQL_USER"], "-p", env["MYSQL_PASSWORD"]) try: output = check_output(args) nag_status = nagios.OK except CalledProcessError as cpe: if cpe.returncode in (nagios.OK, nagios.WARN, nagios.CRIT, nagios.UNKNOWN): output = cpe.output nag_status = cpe.returncode else: output = str(cpe) nag_status = nagios.UNKNOWN return report(nag_status, output)
def check(warn, crit, minimum, project): if crit < warn: msg = "critical threshold cannot be lower than warning threshold: %d < %d" raise ValueError(msg % (crit, warn)) if not project: project = openshift.get_project() results = [] errors = [] pcs = openshift.get_running_pod_containers(project) for pod_name, container_name, container_data in pcs: if "mongo" in pod_name: try: result = openshift.exec_in_pod_container( project, pod_name, container_name, check_disk_cmd) results.extend( analize(pod_name, container_name, parse_df_lines(result), warn, crit)) except Exception as e: errors.append((pod_name, container_name, e)) return report(results, errors, minimum)
def check(service, containers): project = openshift.get_project() selectors = openshift.get_service_selectors(project, service) pcs = openshift.get_running_pod_containers(project, selector=selectors, container_names=containers.split(',')) # since core only has single mysql with replication, we only check one even if there are multiple results pod_name, container_name, container_data = pcs[0] env = openshift.get_container_env(project, pod_name, container_name) args = (check_mysql_path, "-H", service, "-u", env["MYSQL_USER"], "-p", env["MYSQL_PASSWORD"]) try: output = check_output(args) nag_status = nagios.OK except CalledProcessError as cpe: if cpe.returncode in (nagios.OK, nagios.WARN, nagios.CRIT, nagios.UNKNOWN): output = cpe.output nag_status = cpe.returncode else: output = str(cpe) nag_status = nagios.UNKNOWN return report(nag_status, output)
def check(containers): project = openshift.get_project() pods = openshift.get_running_pod_names(project, container_names=containers.split(',')) rs_statuses = map(parse_mongo_result, openshift.exec_in_pods(project, pods, check_mongodb_cmd)) nag_statuses = map(analize, rs_statuses) return report(pods, rs_statuses, nag_statuses)
def check(containers): project = openshift.get_project() pods = openshift.get_running_pod_names( project, container_names=containers.split(',')) collections = map(parse_mongo_result, openshift.exec_in_pods(project, pods, check_mongodb_cmd)) result = topic_existance(topics, collections) return report(pods, project, result)
def check(containers): project = openshift.get_project() pods = openshift.get_running_pod_names( project, container_names=containers.split(',')) rs_statuses = map(parse_mongo_result, openshift.exec_in_pods(project, pods, check_mongodb_cmd)) nag_statuses = map(analize, rs_statuses) return report(pods, rs_statuses, nag_statuses)
def check(warn, crit): if crit < warn: msg = "critical threshold cannot be lower than warning threshold: %d < %d" raise ValueError(msg % (crit, warn)) project = openshift.get_project() results = [] pods = openshift.get_running_pod_names(project) execs = openshift.exec_in_pods(project, pods, check_disk_cmd) for pod, lines in zip(pods, execs): results.extend(analize(pod, parse_df_lines(lines), warn, crit)) return report(results)
def check(): issues = [] project = openshift.get_project() deploymentConfigs = openshift.get_deploymentconfigs(project) for deploymentConfig in deploymentConfigs["items"]: componentName = deploymentConfig["metadata"]["name"] pods = openshift.get_running_pod_names( project, container_names=componentName) nodes = openshift.get_nodes_from_names(pods) if len(pods) > 1: for node in set(nodes): nodeCount = nodes.count(node) if nodeCount > 1: issues.append("WARN: %s has %s pods running on the same node: %s" % ( componentName, nodeCount, node)) return report(issues)
def check(project): if not project: project = openshift.get_project() results = [] errors = [] pcs = openshift.get_running_pod_containers(project) for pod_name, container_name, container_data in pcs: resource_limits = container_data.get('resources', {}).get('limits', {}) try: results.extend(analize(pod_name, container_name, resource_limits)) except Exception as e: errors.append((pod_name, container_name, e)) return report(results, errors)
def check(): project = openshift.get_project() pods = openshift.get_running_pod_names(project, container_names="mongodb") if not pods: output = "Unable to locate any mongodb containers" return nagios.UNKNOWN nodes = openshift.get_nodes_from_names(pods) nodes_pods = dict(zip(pods, nodes)) if len(nodes) < 3: output = nodes_pods return nagios.CRIT if nodes[0] == nodes[1] or nodes[0] == nodes[2] or nodes[1] == nodes[2]: output = nodes_pods nag_status = nagios.WARN else: output = nodes_pods nag_status = nagios.OK return report(nag_status, output)
def check(warn, crit, minimum, project): if crit < warn: msg = "critical threshold cannot be lower than warning threshold: %d < %d" raise ValueError(msg % (crit, warn)) if not project: project = openshift.get_project() results = [] errors = [] pcs = openshift.get_running_pod_containers(project) for pod_name, container_name, container_data in pcs: try: result = openshift.exec_in_pod_container(project, pod_name, container_name, check_disk_cmd) results.extend(analize(pod_name, container_name, parse_df_lines(result), warn, crit)) except Exception as e: errors.append((pod_name, container_name, e)) return report(results, errors, minimum)
def check(warn, crit, project): if crit < warn: msg = "critical threshold cannot be lower than warning threshold: %d < %d" raise ValueError(msg % (crit, warn)) if not project: project = openshift.get_project() results = [] errors = [] prev_cpu_usage = read_cpu_usage() curr_cpu_usage = {} pcs = openshift.get_running_pod_containers(project) for pod_name, container_name, container_data in pcs: cpu_limit = container_data.get('resources', {}).get('limits', {}).get('cpu') try: if cpu_limit: curr_usage, curr_uptime, limit = get_container_cpu_usage( project, pod_name, container_name) curr_cpu_usage[pod_name] = curr_cpu_usage.get(pod_name, {}) curr_cpu_usage[pod_name][container_name] = [ curr_usage, curr_uptime ] prev_usage, prev_uptime = prev_cpu_usage.get(pod_name, {}).get( container_name, [None, None]) results.extend( analize(pod_name, container_name, prev_usage, curr_usage, prev_uptime, curr_uptime, limit, warn, crit)) except Exception as e: errors.append((pod_name, container_name, e)) write_cpu_usage(curr_cpu_usage) return report(results, errors)
def check(project): if not project: project = openshift.get_project() results, error = do_request(project) return report(results, error)
def check(project): if not project: project = openshift.get_project() data = do_request(project) results, errors = parse_response(data) return report(results, errors)