def check(warn, crit, project): if crit < warn: msg = "critical threshold cannot be lower than warning threshold: %d < %d" raise ValueError(msg % (crit, warn)) if not project: project = openshift.get_project() results = [] errors = [] pcs = openshift.get_running_pod_containers(project) for pod_name, container_name, container_data in pcs: memory_limit = container_data.get('resources', {}).get('limits', {}).get('memory') try: memory_usage = openshift.exec_in_pod_container( project, pod_name, container_name, check_memory_usage_cmd) if memory_limit: memory_limit = openshift.exec_in_pod_container( project, pod_name, container_name, check_memory_limit_cmd) results.extend( analize(pod_name, container_name, memory_limit, memory_usage, warn, crit)) except Exception as e: errors.append((pod_name, container_name, e)) return report(results, errors)
def check(service, containers): project = openshift.get_project() selectors = openshift.get_service_selectors(project, service) pcs = openshift.get_running_pod_containers(project, selector=selectors, container_names=containers.split(',')) # since core only has single mysql with replication, we only check one even if there are multiple results pod_name, container_name, container_data = pcs[0] env = openshift.get_container_env(project, pod_name, container_name) args = (check_mysql_path, "-H", service, "-u", env["MYSQL_USER"], "-p", env["MYSQL_PASSWORD"]) try: output = check_output(args) nag_status = nagios.OK except CalledProcessError as cpe: if cpe.returncode in (nagios.OK, nagios.WARN, nagios.CRIT, nagios.UNKNOWN): output = cpe.output nag_status = cpe.returncode else: output = str(cpe) nag_status = nagios.UNKNOWN return report(nag_status, output)
def check(warn, crit, minimum, project): if crit < warn: msg = "critical threshold cannot be lower than warning threshold: %d < %d" raise ValueError(msg % (crit, warn)) if not project: project = openshift.get_project() results = [] errors = [] pcs = openshift.get_running_pod_containers(project) for pod_name, container_name, container_data in pcs: if "mongo" in pod_name: try: result = openshift.exec_in_pod_container( project, pod_name, container_name, check_disk_cmd) results.extend( analize(pod_name, container_name, parse_df_lines(result), warn, crit)) except Exception as e: errors.append((pod_name, container_name, e)) return report(results, errors, minimum)
def check(service, containers): project = openshift.get_project() selectors = openshift.get_service_selectors(project, service) pcs = openshift.get_running_pod_containers( project, selector=selectors, container_names=containers.split(',')) # since core only has single mysql with replication, we only check one even if there are multiple results pod_name, container_name, container_data = pcs[0] env = openshift.get_container_env(project, pod_name, container_name) args = (check_mysql_path, "-H", service, "-u", env["MYSQL_USER"], "-p", env["MYSQL_PASSWORD"]) try: output = check_output(args) nag_status = nagios.OK except CalledProcessError as cpe: if cpe.returncode in (nagios.OK, nagios.WARN, nagios.CRIT, nagios.UNKNOWN): output = cpe.output nag_status = cpe.returncode else: output = str(cpe) nag_status = nagios.UNKNOWN return report(nag_status, output)
def check(project): if not project: project = openshift.get_project() results = [] errors = [] pcs = openshift.get_running_pod_containers(project) for pod_name, container_name, container_data in pcs: resource_limits = container_data.get('resources', {}).get('limits', {}) try: results.extend(analize(pod_name, container_name, resource_limits)) except Exception as e: errors.append((pod_name, container_name, e)) return report(results, errors)
def check(warn, crit, minimum, project): if crit < warn: msg = "critical threshold cannot be lower than warning threshold: %d < %d" raise ValueError(msg % (crit, warn)) if not project: project = openshift.get_project() results = [] errors = [] pcs = openshift.get_running_pod_containers(project) for pod_name, container_name, container_data in pcs: try: result = openshift.exec_in_pod_container(project, pod_name, container_name, check_disk_cmd) results.extend(analize(pod_name, container_name, parse_df_lines(result), warn, crit)) except Exception as e: errors.append((pod_name, container_name, e)) return report(results, errors, minimum)
def check(warn, crit, project): if crit < warn: msg = "critical threshold cannot be lower than warning threshold: %d < %d" raise ValueError(msg % (crit, warn)) if not project: project = openshift.get_project() results = [] errors = [] prev_cpu_usage = read_cpu_usage() curr_cpu_usage = {} pcs = openshift.get_running_pod_containers(project) for pod_name, container_name, container_data in pcs: cpu_limit = container_data.get('resources', {}).get('limits', {}).get('cpu') try: if cpu_limit: curr_usage, curr_uptime, limit = get_container_cpu_usage( project, pod_name, container_name) curr_cpu_usage[pod_name] = curr_cpu_usage.get(pod_name, {}) curr_cpu_usage[pod_name][container_name] = [ curr_usage, curr_uptime ] prev_usage, prev_uptime = prev_cpu_usage.get(pod_name, {}).get( container_name, [None, None]) results.extend( analize(pod_name, container_name, prev_usage, curr_usage, prev_uptime, curr_uptime, limit, warn, crit)) except Exception as e: errors.append((pod_name, container_name, e)) write_cpu_usage(curr_cpu_usage) return report(results, errors)