def main(): '''get docker and openshift versions and send to metric sender ''' args = parse_args() mts = MetricSender(verbose=args.verbose, debug=args.debug) # Check if host rpm db is mounted. Otherwise check againts container db rpm_db_path = "/host/var/lib/rpm" if not os.path.exists(rpm_db_path): rpm_db_path = "/var/lib/rpm" keys = {} # Accumulate failures failures = 0 # Get docker version success, err = add_specific_rpm_version("docker", rpm_db_path, keys, mts) if not success: failures += 1 print "Failed to get docker rpm version. " + err.output openshift_package_name = "origin" # Get openshift node version (attempt upstream) success, err = add_specific_rpm_version( "{}-node".format(openshift_package_name), rpm_db_path, keys, mts, "openshift.node.") if not success: # Get openshift version (attempt downstream) openshift_package_name = "atomic-openshift" success, err2 = add_specific_rpm_version( "{}-node".format(openshift_package_name), rpm_db_path, keys, mts, "openshift.node.") if not success: failures += 1 print "Failed to get openshift rpm version:\n" + err.output + +err2.output # Get openshift master version (upstream or downstream) - only if node rpm found if success: success, err = add_specific_rpm_version( "{}-master".format(openshift_package_name), rpm_db_path, keys, mts, "openshift.master.") if not success: # Print notification but don't count this as failure print "Note: " + err.output print "Sending these metrics:" print json.dumps(keys, indent=4) mts.send_metrics() print "\nDone.\n" sys.exit(failures)
def send_metrics(usage, capacity, used): """ send data to MetricSender""" logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") ms.add_metric({'openshift.master.pv.percent.usage': usage}) ms.add_metric({'openshift.master.pv.capacity.max': capacity}) ms.add_metric({'openshift.master.pv.capacity.used': used}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def main(): """ ds pod check """ args = parse_args() logger.debug("args: ") logger.debug(args) result = check_ds_status(args=args, ) #send the value to zabbix mts = MetricSender(verbose=args.verbose) mts.add_metric({args.key: result}) mts.send_metrics()
def main(): """ SAML Pod Status """ args = parse_args() logger.debug("args: ") logger.debug(args) result = test_saml_pod(args=args, ) #send the value to zabbix mts = MetricSender(verbose=args.verbose) mts.add_metric({'openshift.master.saml.status': result}) mts.send_metrics()
def run(self): """ Main function to run the check """ self.parse_args() self.metric_sender = MetricSender(verbose=self.args.verbose, debug=self.args.debug) status = self.parse_config() self.metric_sender.add_metric({"openshift.kubeconfig.status": status}) self.metric_sender.send_metrics()
def main(): """ check all the node pods tatus see if any node have problem """ args = parse_args() logger.debug("args: ") logger.debug(args) nsList = args.skip_namespaces pods_status = check_node_pods_status(nsList) #send the value to zabbix mts = MetricSender(verbose=args.verbose) mts.add_metric({'openshift.nodes.pods.status': pods_status}) mts.send_metrics()
def report_to_zabbix(total_expired_snapshots, total_deleted_snapshots, total_deletion_errors): """ Sends the commands exit code to zabbix. """ mts = MetricSender(verbose=True) mts.add_metric({ EXPIRED_SNAPSHOTS_KEY: total_expired_snapshots, DELETED_SNAPSHOTS_KEY: total_deleted_snapshots, DELETION_ERRORS_KEY: total_deletion_errors }) mts.send_metrics()
def main(): """ check all the node taints see if any node have problem """ args = parse_args() logger.debug("args: ") logger.debug(args) taint_status = check_taint_status() #send the value to zabbix mts = MetricSender(verbose=args.verbose) mts.add_metric({'openshift.nodes.taint.status': taint_status}) mts.send_metrics()
def main(): """ Main function to run the check """ args = parse_args() metric_sender = MetricSender(verbose=args.verbose, debug=args.debug) discovery_key_disk = 'disc.disk' interval = 3 pcp_disk_dev_metrics = ['disk.dev.total', 'disk.dev.avactive'] item_prototype_macro_disk = '#OSO_DISK' item_prototype_key_tps = 'disc.disk.tps' item_prototype_key_putil = 'disc.disk.putil' disk_metrics = pminfo.get_sampled_data(pcp_disk_dev_metrics, interval, 2) pcp_metrics_divided = {} for metric in pcp_disk_dev_metrics: pcp_metrics_divided[metric] = { k: v for k, v in disk_metrics.items() if metric in k } # do TPS checks; use disk.dev.total filtered_disk_totals = clean_up_metric_dict( pcp_metrics_divided[pcp_disk_dev_metrics[0]], pcp_disk_dev_metrics[0] + '.') # Add dynamic items metric_sender.add_dynamic_metric(discovery_key_disk, item_prototype_macro_disk, filtered_disk_totals.keys()) # calculate the TPS and add them to the ZaggSender for disk, totals in filtered_disk_totals.iteritems(): disk_tps = (totals[1] - totals[0]) / interval metric_sender.add_metric( {'%s[%s]' % (item_prototype_key_tps, disk): disk_tps}) # do % Util checks; use disk.dev.avactive filtered_disk_totals = clean_up_metric_dict( pcp_metrics_divided[pcp_disk_dev_metrics[1]], pcp_disk_dev_metrics[1] + '.') # calculate the % Util and add them to the ZaggSender for disk, totals in filtered_disk_totals.iteritems(): total_active = (float)(totals[1] - totals[0]) / 1000.0 putil = 100 * total_active / interval metric_sender.add_metric( {'%s[%s]' % (item_prototype_key_putil, disk): putil}) metric_sender.send_metrics()
def send_metrics(reload_time): """ send data to MetricSender""" logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") ms.add_metric({'openshift.haproxy.reload_time' : reload_time}) logger.debug({'openshift.haproxy.reload_time' : reload_time}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def main(): """ check all the node labels see if anything miss """ args = parse_args() logger.debug("args: ") logger.debug(args) #result = test_saml_pod(args=args, ) label_status = check_label_status() #send the value to zabbix mts = MetricSender(verbose=args.verbose) mts.add_metric({'openshift.nodes.label.status': label_status}) mts.send_metrics()
def send_metrics(key, result): ''' send data to MetricSender ''' logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") ms.add_metric({key: result}) logger.debug({key: result}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def __init__(self): """ initialize EBSStuckVolumesCheck class """ self.args = None self.vol_state_data = None self.parse_args() # Make sure we're using the profile they've requested. if self.args.aws_creds_profile: os.environ['AWS_PROFILE'] = self.args.aws_creds_profile self.eu = EbsUtil(self.args.region, verbose=self.args.verbose) self.mts = MetricSender(verbose=self.args.verbose)
def run(self): """ Main function that runs the check """ self.parse_args() self.metric_sender = MetricSender(verbose=self.args.verbose, debug=self.args.debug) self.oc = OCUtil(namespace=self.get_logging_namespace(), config_file='/tmp/admin.kubeconfig', verbose=self.args.verbose) self.get_pods() oldest_buffer = self.check_fluentd_queues() self.send_metrics(oldest_buffer)
def send_metrics(stuck_builds, build_state): """ send data to MetricSender""" logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") logger.debug({'openshift.stuck_builds.%s' % build_state.lower() : stuck_builds}) ms.add_metric({'openshift.stuck_builds.%s' % build_state.lower() : stuck_builds}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def send_zagg_data(keep_time): ''' send data to Zagg''' logger.debug('send_zagg_data()') zgs_time = time.time() zgs = MetricSender() zgs.add_metric({'openshift.master.project.terminating.time': keep_time}) try: zgs.send_metrics() except: logger.exception('Error sending to Zagg') logger.info("Data sent in %s seconds", str(time.time() - zgs_time))
def kill(self): ''' class entrypoint ''' self.parse_args() self.current_time = time.time() haproxy_procs_etimes = self.get_all_haproxy_procs() # identify most recent haproxy process # and remove it from list of haproxy processes try: youngest_etimes = min(haproxy_procs_etimes.keys()) youngest_pid = haproxy_procs_etimes[youngest_etimes] self.dprint("Youngest haproxy PID: {}".format(youngest_pid)) haproxy_procs_etimes.pop(youngest_etimes) except ValueError: pass # find processes that have connections only in 'CLOSE-WAIT' state kill_list = [] for proc in haproxy_procs_etimes.values(): try: only_close_wait = True process = psutil.Process(proc) for conn in process.connections(): if conn.status != 'CLOSE_WAIT' and conn.status != 'FIN_WAIT2': only_close_wait = False break if only_close_wait: self.dprint("PID: {} marked for removal".format(proc)) kill_list.append(proc) except psutil.NoSuchProcess: pass # stop processes on the kill_list kill_count = 0 for proc in kill_list: try: process = psutil.Process(proc) self.dprint("Stopping PID: {}".format(process.pid)) process.kill() kill_count += 1 except psutil.NoSuchProcess: pass print "Stopped {} haproxy processes".format(kill_count) ms = MetricSender() ms.add_metric({ZABBIX_KEY: kill_count}) ms.send_metrics()
def send_metrics(problems): """ send data to MetricSender""" logger.debug("send_metrics(problems)") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") ms.add_metric({'aws.ec2.instance.instance_status': problems['InstanceStatus']}) ms.add_metric({'aws.ec2.instance.system_status': problems['SystemStatus']}) ms.add_metric({'aws.ec2.instance.events': problems['Events']}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def send_metrics(builds): """ send data to MetricSender""" logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") for build_state, count in builds.items(): logger.debug({'openshift.build_state.%s' % build_state: count}) ms.add_metric({'openshift.build_state.%s' % build_state: count}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def send_metrics(status_code_create, status_code_delete): """ send data to MetricSender""" logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") # 1 means create and delete the project failed ms.add_metric({'openshift.master.project.create': status_code_create}) ms.add_metric({'openshift.master.project.delete': status_code_delete}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def __init__(self, config_file=None): if not config_file: self.config_file = '/etc/openshift_tools/container_metrics.yml' else: self.config_file = config_file self.config = None self.parse_config() self.cli = AutoVersionClient(base_url='unix://var/run/docker.sock', timeout=120) self.docker_util = DockerUtil(self.cli) self.metric_sender = MetricSender(verbose=True)
def run(self): """Main function to run the check""" self.ocutil = OCUtil(config_file=self.kubeconfig, verbose=self.args.verbose) self.metrics = MetricSender(verbose=self.args.verbose, debug=self.args.debug) self.check_all_router_health() self.check_router_replica_count() if self.args.dry_run: self.metrics.print_unique_metrics_key_value() else: self.metrics.send_metrics()
def run(self): ''' Main function that runs the check ''' self.parse_args() self.metric_sender = MetricSender(verbose=self.args.verbose, debug=self.args.debug) self.oc = OCUtil(namespace='openshift-infra', config_file=self.kubeconfig, verbose=self.args.verbose) pod_report = self.check_pods() self.get_hawkular_creds() metrics_report = self.check_node_metrics() self.report_to_zabbix(pod_report, metrics_report)
def config_metric_sender(self): """ configure the metric_sender """ metric_verbose = self.args.verbose metric_debug = self.args.debug host = self.args.host if self.args.host else self.config['host']['name'] if isinstance(metric_verbose, str): metric_verbose = (metric_verbose == 'True') if isinstance(metric_debug, str): metric_debug = (metric_debug == 'True') self.metric_sender = MetricSender(host=host, verbose=metric_verbose, debug=metric_debug, config_file=self.args.config_file)
def run(self): """ Main function to run the check """ self.parse_args() self.ora = OpenshiftRestApi() self.metric_sender = MetricSender(verbose=self.args.verbose, debug=self.args.debug) try: self.get_pods() except Exception as ex: print "Problem retreiving pod data: %s " % ex.message self.metric_sender.send_metrics()
def send_metrics(self, oldest_buffer): """ Send data to MetricSender """ logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Sending data to MetricSender...") logger.debug({'openshift.logging.fluentd.queue.oldest': oldest_buffer}) ms.add_metric( {'openshift.logging.fluentd.queue.oldest': oldest_buffer}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def main(): """ Main function to run the check """ args = parse_args() metric_sender = MetricSender(verbose=args.verbose, debug=args.debug) filesys_full_metric = ['filesys.full'] filesys_inode_derived_metrics = {'filesys.inodes.pused' : 'filesys.usedfiles / (filesys.usedfiles + filesys.freefiles) * 100' } discovery_key_fs = 'disc.filesys' item_prototype_macro_fs = '#OSO_FILESYS' item_prototype_key_full = 'disc.filesys.full' item_prototype_key_inode = 'disc.filesys.inodes.pused' # Get the disk space filesys_full_metrics = pminfo.get_metrics(filesys_full_metric) filtered_filesys_metrics = filter_out_key_name_chars(filesys_full_metrics, 'filesys.full.') filtered_filesys_metrics = filter_out_container_root(filtered_filesys_metrics) if args.filter_pod_pv: filtered_filesys_metrics = filter_out_customer_pv_filesystems(filtered_filesys_metrics) if args.force_send_zeros: filtered_filesys_metrics = zero_mount_percentages(filtered_filesys_metrics) metric_sender.add_dynamic_metric(discovery_key_fs, item_prototype_macro_fs, filtered_filesys_metrics.keys()) for filesys_name, filesys_full in filtered_filesys_metrics.iteritems(): metric_sender.add_metric({'%s[%s]' % (item_prototype_key_full, filesys_name): filesys_full}) # Get filesytem inode metrics filesys_inode_metrics = pminfo.get_metrics(derived_metrics=filesys_inode_derived_metrics) filtered_filesys_inode_metrics = filter_out_key_name_chars(filesys_inode_metrics, 'filesys.inodes.pused.') filtered_filesys_inode_metrics = filter_out_container_root(filtered_filesys_inode_metrics) if args.filter_pod_pv: filtered_filesys_inode_metrics = filter_out_customer_pv_filesystems(filtered_filesys_inode_metrics) if args.force_send_zeros: filtered_filesys_inode_metrics = zero_mount_percentages(filtered_filesys_inode_metrics) for filesys_name, filesys_inodes in filtered_filesys_inode_metrics.iteritems(): metric_sender.add_metric({'%s[%s]' % (item_prototype_key_inode, filesys_name): filesys_inodes}) metric_sender.send_metrics()
def report_to_zabbix(self, disc_key, disc_macro, item_proto_key, value): """ Sends the commands exit code to zabbix. """ mts = MetricSender() # Add the dynamic item self.verbose_print("Adding the dynamic item to Zabbix - %s, %s, [%s]" % \ (disc_key, disc_macro, self.args.name)) mts.add_dynamic_metric(disc_key, disc_macro, [self.args.name]) # Send the value for the dynamic item self.verbose_print("Sending metric to Zabbix - %s[%s]: %s" % \ (item_proto_key, self.args.name, value)) mts.add_metric({'%s[%s]' % (item_proto_key, self.args.name): value}) # Actually send them mts.send_metrics()
def send_metric_data(bucket_list, bucket_stats, args): '''send data to zabbix ''' discovery_key = "disc.aws" discovery_macro = "#S3_BUCKET" prototype_s3_size = "disc.aws.size" prototype_s3_count = "disc.aws.objects" mts = MetricSender(verbose=args.debug) mts.add_dynamic_metric(discovery_key, discovery_macro, bucket_list) for bucket in bucket_stats.keys(): zab_key = "{}[{}]".format(prototype_s3_size, bucket) mts.add_metric({zab_key: int(round(bucket_stats[bucket]["size"]))}) zab_key = "{}[{}]".format(prototype_s3_count, bucket) mts.add_metric({zab_key: bucket_stats[bucket]["objects"]}) mts.send_metrics()
def main(): ''' Gather and examine details about this node within ELBs ''' args = parse_args() aws_access, aws_secret = get_aws_creds('/root/.aws/credentials') instance_region = get_instance_region() # Create boto client to access ELB resources client = boto3.client( 'elb', aws_access_key_id=aws_access, aws_secret_access_key=aws_secret, region_name=instance_region ) # Call all available loadbalancers in the AWS account and store blob result in elb_descriptions elb_descriptions = client.describe_load_balancers() elb_names = map(get_elb_name, elb_descriptions['LoadBalancerDescriptions']) # Get a list of available ELBs for a cluster elb_tags = client.describe_tags(LoadBalancerNames=elb_names) cluster_elbs = filter_by_cluster(elb_tags, args.clusterid) # Filter any ELBs created by service of type LoadBalancer that is not in our watched namespaces monitored_elbs = filter_monitored_service_elbs(cluster_elbs) monitored_elb_names = map(get_elb_name, monitored_elbs) # Perform health check of each instance available behind each ELB elb_health_check(client, monitored_elb_names) ### Metric Checks if len(elb_no_instances) != 0: for _, elb in enumerate(elb_no_instances): elb_instances_unhealthy.append(elb) print "ELB: %s has no instances behind it. Please investigate." % elb ### Unhealthy count check elb_instances_unhealthy_metric = len(elb_instances_unhealthy) if elb_instances_unhealthy_metric != 0: for _, unhealthy in enumerate(elb_instances_unhealthy): print unhealthy # ''' Now that we know if this instance is missing, feed zabbix ''' mts = MetricSender(verbose=args.verbose, debug=args.debug) mts.add_metric({'openshift.aws.elb.health' : elb_instances_unhealthy_metric}) mts.send_metrics()