def send_metrics(curlresult, service_status): """ send data to MetricSender""" ms = MetricSender() ms.add_metric({'openshift.master.dnsmasq.curl.status': curlresult}) ms.add_metric({'openshift.master.dnsmasq.service.status': service_status}) ms.send_metrics()
def main(): """ Main function to run the check """ args = parse_args() metric_sender = MetricSender(verbose=args.verbose, debug=args.debug) filesys_full_metric = ['filesys.full'] filesys_inode_derived_metrics = { 'filesys.inodes.pused': 'filesys.usedfiles / (filesys.usedfiles + filesys.freefiles) * 100' } discovery_key_fs = 'disc.filesys' item_prototype_macro_fs = '#OSO_FILESYS' item_prototype_key_full = 'disc.filesys.full' item_prototype_key_inode = 'disc.filesys.inodes.pused' # Get the disk space filesys_full_metrics = pminfo.get_metrics(filesys_full_metric) filtered_filesys_metrics = filter_out_docker_filesystems( filesys_full_metrics, 'filesys.full.') if args.filter_pod_pv: filtered_filesys_metrics = filter_out_customer_pv_filesystems( filtered_filesys_metrics) if args.force_send_zeros: filtered_filesys_metrics = zero_mount_percentages( filtered_filesys_metrics) metric_sender.add_dynamic_metric(discovery_key_fs, item_prototype_macro_fs, filtered_filesys_metrics.keys()) for filesys_name, filesys_full in filtered_filesys_metrics.iteritems(): metric_sender.add_metric( {'%s[%s]' % (item_prototype_key_full, filesys_name): filesys_full}) # Get filesytem inode metrics filesys_inode_metrics = pminfo.get_metrics( derived_metrics=filesys_inode_derived_metrics) filtered_filesys_inode_metrics = filter_out_docker_filesystems( filesys_inode_metrics, 'filesys.inodes.pused.') if args.filter_pod_pv: filtered_filesys_inode_metrics = filter_out_customer_pv_filesystems( filtered_filesys_inode_metrics) if args.force_send_zeros: filtered_filesys_inode_metrics = zero_mount_percentages( filtered_filesys_inode_metrics) for filesys_name, filesys_inodes in filtered_filesys_inode_metrics.iteritems( ): metric_sender.add_metric({ '%s[%s]' % (item_prototype_key_inode, filesys_name): filesys_inodes }) metric_sender.send_metrics()
def main(): """ main() """ logger.debug("main()") args = parse_args() if args.debug: logger.setLevel(logging.DEBUG) if args.verbose: logger.setLevel(logging.INFO) if args.aws_creds_profile: os.environ['AWS_PROFILE'] = args.aws_creds_profile ms = MetricSender(verbose=args.verbose, debug=args.debug) # get regions regions = Base.get_supported_regions() logger.debug("Get all regions: %s", regions) count = 0 for region in regions: logger.info("Get Elastic IP in region %s", region) eips = getEIPByRegion(region.name) logger.debug("elastic ips: %s", eips) for eip in eips: if eip.instance_id is None: count += 1 logger.warn("EIP: %s is not associated to any instance", eip) ms_time = time.time() logger.info("Send data to MetricSender") ms.add_metric({'aws.ec2.eip.status': count}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def run(self): ''' main function ''' while True: event_list = [] while not self.queue.empty(): event = self.queue.get() if self.args.debug: print "Processing event: {}".format(str(event)) event_list.append(event) # initialize event counts so that we send '0' events # in the case where no events were received event_counts = {} for zbx_key in self.zbx_keys: event_counts[zbx_key] = 0 # add up each distinct event for event in event_list: event_counts[event] += 1 if self.args.verbose or self.args.dry_run: print "Got events: " + str(event_counts) if not self.args.dry_run: metric_sender = MetricSender(verbose=self.args.verbose, debug=self.args.debug) for event, count in event_counts.iteritems(): metric_sender.add_metric({event: count}) metric_sender.send_metrics() time.sleep(self.args.reporting_period)
def report_tags_to_zabbix(tags): """ Sends the commands exit code to zabbix. """ mts = MetricSender(verbose=True) ####################################################### # This reports the "config" tag from each instance # If config ~= "true", report 0 # If config ~= "false", report 1 # If config not found, report 2 ####################################################### for tag in tags: if 'config' in tag.keys(): if tag['config'].lower() == "true": config_value = 0 else: config_value = 1 else: config_value = 2 mts.add_metric({CONFIG_LOOP_TAG_KEY: config_value}, host=tag['name']) #################################### # End of config tag checking #################################### # Actually send them mts.send_metrics()
def main(): """ Main function to run the check """ argz = parse_args() conn_count = 0 for proc in psutil.process_iter(): try: if proc.name() == argz.proc_to_check: if argz.debug: print proc.connections() for conn in proc.connections(): if conn.status == argz.conn_status and conn.laddr[1] == argz.port: conn_count += 1 except psutil.NoSuchProcess: pass if argz.debug: print 'Process ({0}) on port {1} has {2} connections in {3} status'.format(argz.proc_to_check, argz.port, conn_count, argz.conn_status ) ms = MetricSender(debug=argz.debug) ms.add_metric({'{0}'.format(argz.zabbix_key) : conn_count}) ms.send_metrics()
def send_metrics(day_left, zabbixkey, verbose): """ send data to MetricSender""" logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender(verbose=verbose, debug=verbose) logger.info("Send data to MetricSender") ms.add_metric({zabbixkey: day_left}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def send_metrics(status): """ send data to MetricSender""" logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") ms.add_metric({'openshift.master.pv.snapshots.tags.status': status}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def send(values, options): ''' Send any passed metrics values ''' if options.verbose: print('Sending values:', values) sender = MetricSender(host=SENDER_HOST, verbose=options.verbose, debug=options.verbose) for key in values: sender.add_metric({key: values[key]}) sender.send_metrics()
def send(values, options): ''' Send any passed metrics values ''' if options.verbose: print('Sending values:', values) sender = MetricSender() for key in values: sender.add_metric({key: values[key]}) sender.send_metrics()
def main(): """ check all the node pods tatus see if any node have problem """ args = parse_args() logger.debug("args: ") logger.debug(args) nsList = args.skip_namespaces pods_status = check_node_pods_status(nsList) #send the value to zabbix mts = MetricSender(verbose=args.verbose) mts.add_metric({'openshift.nodes.pods.status': pods_status}) mts.send_metrics()
def main(): """ ds pod check """ args = parse_args() logger.debug("args: ") logger.debug(args) result = check_ds_status(args=args, ) #send the value to zabbix mts = MetricSender(verbose=args.verbose) mts.add_metric({args.key: result}) mts.send_metrics()
def main(): """ check all the node taints see if any node have problem """ args = parse_args() logger.debug("args: ") logger.debug(args) taint_status = check_taint_status() #send the value to zabbix mts = MetricSender(verbose=args.verbose) mts.add_metric({'openshift.nodes.taint.status': taint_status}) mts.send_metrics()
def main(): """ SAML Pod Status """ args = parse_args() logger.debug("args: ") logger.debug(args) result = test_saml_pod(args=args, ) #send the value to zabbix mts = MetricSender(verbose=args.verbose) mts.add_metric({'openshift.master.saml.status': result}) mts.send_metrics()
def report_to_zabbix(total_expired_snapshots, total_deleted_snapshots, total_deletion_errors): """ Sends the commands exit code to zabbix. """ mts = MetricSender(verbose=True) mts.add_metric({ EXPIRED_SNAPSHOTS_KEY: total_expired_snapshots, DELETED_SNAPSHOTS_KEY: total_deleted_snapshots, DELETION_ERRORS_KEY: total_deletion_errors }) mts.send_metrics()
def main(): """ Main function to run the check """ args = parse_args() metric_sender = MetricSender(verbose=args.verbose, debug=args.debug) discovery_key_disk = 'disc.disk' interval = 3 pcp_disk_dev_metrics = ['disk.dev.total', 'disk.dev.avactive'] item_prototype_macro_disk = '#OSO_DISK' item_prototype_key_tps = 'disc.disk.tps' item_prototype_key_putil = 'disc.disk.putil' disk_metrics = pminfo.get_sampled_data(pcp_disk_dev_metrics, interval, 2) pcp_metrics_divided = {} for metric in pcp_disk_dev_metrics: pcp_metrics_divided[metric] = { k: v for k, v in disk_metrics.items() if metric in k } # do TPS checks; use disk.dev.total filtered_disk_totals = clean_up_metric_dict( pcp_metrics_divided[pcp_disk_dev_metrics[0]], pcp_disk_dev_metrics[0] + '.') # Add dynamic items metric_sender.add_dynamic_metric(discovery_key_disk, item_prototype_macro_disk, filtered_disk_totals.keys()) # calculate the TPS and add them to the ZaggSender for disk, totals in filtered_disk_totals.iteritems(): disk_tps = (totals[1] - totals[0]) / interval metric_sender.add_metric( {'%s[%s]' % (item_prototype_key_tps, disk): disk_tps}) # do % Util checks; use disk.dev.avactive filtered_disk_totals = clean_up_metric_dict( pcp_metrics_divided[pcp_disk_dev_metrics[1]], pcp_disk_dev_metrics[1] + '.') # calculate the % Util and add them to the ZaggSender for disk, totals in filtered_disk_totals.iteritems(): total_active = (float)(totals[1] - totals[0]) / 1000.0 putil = 100 * total_active / interval metric_sender.add_metric( {'%s[%s]' % (item_prototype_key_putil, disk): putil}) metric_sender.send_metrics()
def send_metrics(reload_time): """ send data to MetricSender""" logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") ms.add_metric({'openshift.haproxy.reload_time' : reload_time}) logger.debug({'openshift.haproxy.reload_time' : reload_time}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def main(): """ check all the node labels see if anything miss """ args = parse_args() logger.debug("args: ") logger.debug(args) #result = test_saml_pod(args=args, ) label_status = check_label_status() #send the value to zabbix mts = MetricSender(verbose=args.verbose) mts.add_metric({'openshift.nodes.label.status': label_status}) mts.send_metrics()
def send_metrics(key, result): ''' send data to MetricSender ''' logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") ms.add_metric({key: result}) logger.debug({key: result}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def send_metrics(stuck_builds, build_state): """ send data to MetricSender""" logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") logger.debug({'openshift.stuck_builds.%s' % build_state.lower() : stuck_builds}) ms.add_metric({'openshift.stuck_builds.%s' % build_state.lower() : stuck_builds}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
class OpenshiftKubeconfigChecker(object): """ Checks whether kubeconfig is valid yaml """ def __init__(self): self.args = None self.metric_sender = None def run(self): """ Main function to run the check """ self.parse_args() self.metric_sender = MetricSender(verbose=self.args.verbose, debug=self.args.debug) status = self.parse_config() self.metric_sender.add_metric({"openshift.kubeconfig.status": status}) self.metric_sender.send_metrics() def parse_config(self): """ Load the kubeconfig """ print "\nAttempt to load the kubeconfig\n" try: yaml.load(open(self.args.config)) return 0 except Exception as ex: print "Failed parsing config %s " % ex.message return 1 def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser( description='Openshift kubeconfig checker') parser.add_argument('-c', '--config', \ help='kubeconfig to parse (default /etc/origin/master/admin.kubeconfig)', \ default='/etc/origin/master/admin.kubeconfig') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args()
def send_metrics(status_code_create, status_code_delete): """ send data to MetricSender""" logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") # 1 means create and delete the project failed ms.add_metric({'openshift.master.project.create': status_code_create}) ms.add_metric({'openshift.master.project.delete': status_code_delete}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def kill(self): ''' class entrypoint ''' self.parse_args() self.current_time = time.time() haproxy_procs_etimes = self.get_all_haproxy_procs() # identify most recent haproxy process # and remove it from list of haproxy processes try: youngest_etimes = min(haproxy_procs_etimes.keys()) youngest_pid = haproxy_procs_etimes[youngest_etimes] self.dprint("Youngest haproxy PID: {}".format(youngest_pid)) haproxy_procs_etimes.pop(youngest_etimes) except ValueError: pass # find processes that have connections only in 'CLOSE-WAIT' state kill_list = [] for proc in haproxy_procs_etimes.values(): try: only_close_wait = True process = psutil.Process(proc) for conn in process.connections(): if conn.status != 'CLOSE_WAIT' and conn.status != 'FIN_WAIT2': only_close_wait = False break if only_close_wait: self.dprint("PID: {} marked for removal".format(proc)) kill_list.append(proc) except psutil.NoSuchProcess: pass # stop processes on the kill_list kill_count = 0 for proc in kill_list: try: process = psutil.Process(proc) self.dprint("Stopping PID: {}".format(process.pid)) process.kill() kill_count += 1 except psutil.NoSuchProcess: pass print "Stopped {} haproxy processes".format(kill_count) ms = MetricSender() ms.add_metric({ZABBIX_KEY: kill_count}) ms.send_metrics()
def send_metrics(builds): """ send data to MetricSender""" logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Send data to MetricSender") for build_state, count in builds.items(): logger.debug({'openshift.build_state.%s' % build_state: count}) ms.add_metric({'openshift.build_state.%s' % build_state: count}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def send_zagg_data(keep_time): ''' send data to Zagg''' logger.debug('send_zagg_data()') zgs_time = time.time() zgs = MetricSender() zgs.add_metric({'openshift.master.project.terminating.time': keep_time}) try: zgs.send_metrics() except: logger.exception('Error sending to Zagg') logger.info("Data sent in %s seconds", str(time.time() - zgs_time))
def send_metrics(self, oldest_buffer): """ Send data to MetricSender """ logger.debug("send_metrics()") ms_time = time.time() ms = MetricSender() logger.info("Sending data to MetricSender...") logger.debug({'openshift.logging.fluentd.queue.oldest': oldest_buffer}) ms.add_metric( {'openshift.logging.fluentd.queue.oldest': oldest_buffer}) ms.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - ms_time))
def report_to_zabbix(self, disc_key, disc_macro, item_proto_key, value): """ Sends the commands exit code to zabbix. """ mts = MetricSender() # Add the dynamic item self.verbose_print("Adding the dynamic item to Zabbix - %s, %s, [%s]" % \ (disc_key, disc_macro, self.args.name)) mts.add_dynamic_metric(disc_key, disc_macro, [self.args.name]) # Send the value for the dynamic item self.verbose_print("Sending metric to Zabbix - %s[%s]: %s" % \ (item_proto_key, self.args.name, value)) mts.add_metric({'%s[%s]' % (item_proto_key, self.args.name): value}) # Actually send them mts.send_metrics()
def send_metric_data(bucket_list, bucket_stats, args): '''send data to zabbix ''' discovery_key = "disc.aws" discovery_macro = "#S3_BUCKET" prototype_s3_size = "disc.aws.size" prototype_s3_count = "disc.aws.objects" mts = MetricSender(verbose=args.debug) mts.add_dynamic_metric(discovery_key, discovery_macro, bucket_list) for bucket in bucket_stats.keys(): zab_key = "{}[{}]".format(prototype_s3_size, bucket) mts.add_metric({zab_key: int(round(bucket_stats[bucket]["size"]))}) zab_key = "{}[{}]".format(prototype_s3_count, bucket) mts.add_metric({zab_key: bucket_stats[bucket]["objects"]}) mts.send_metrics()
def main(): ''' Gather and examine details about this node within ELBs ''' args = parse_args() aws_access, aws_secret = get_aws_creds('/root/.aws/credentials') instance_region = get_instance_region() elb = boto.ec2.elb.connect_to_region(instance_region, aws_access_key_id=aws_access, aws_secret_access_key=aws_secret) instance_name = get_instance_name( '/etc/openshift_tools/metric_sender.yaml') ''' Define what instance type this node is, only master/infra are in ELBs ''' if "master" in instance_name: instance_type = "master" if args.verbose: print "Instance %s type is master." % instance_name elif "infra" in instance_name: instance_type = "infra" if args.verbose: print "Instance %s type is infra." % instance_name else: print "%s is not an infra or master node. Nothing to do." exit() ''' Fetch the load balancers and make sure this instance is within them ''' try: elbs = elb.get_all_load_balancers() except: print "Rate limit reached, skipping." exit() instance_id = get_instance_id() instance_missing = 0 for i in elbs: if instance_type in i.name: if not filter(lambda x: x.id == instance_id, i.instances): instance_missing = 1 if args.verbose: print "Instance %s is missing from ELB %s!" % (instance_id, i.name) ''' Now that we know if this instance is missing, feed zabbix ''' mts = MetricSender(verbose=args.verbose, debug=args.debug) mts.add_metric({'openshift.aws.elb.status': instance_missing}) mts.send_metrics()
def main(): ''' Gather and examine details about this node within ELBs ''' args = parse_args() aws_access, aws_secret = get_aws_creds('/root/.aws/credentials') instance_region = get_instance_region() # Create boto client to access ELB resources client = boto3.client( 'elb', aws_access_key_id=aws_access, aws_secret_access_key=aws_secret, region_name=instance_region ) # Call all available loadbalancers in the AWS account and store blob result in elb_descriptions elb_descriptions = client.describe_load_balancers() elb_names = map(get_elb_name, elb_descriptions['LoadBalancerDescriptions']) # Get a list of available ELBs for a cluster elb_tags = client.describe_tags(LoadBalancerNames=elb_names) cluster_elbs = filter_by_cluster(elb_tags, args.clusterid) # Filter any ELBs created by service of type LoadBalancer that is not in our watched namespaces monitored_elbs = filter_monitored_service_elbs(cluster_elbs) monitored_elb_names = map(get_elb_name, monitored_elbs) # Perform health check of each instance available behind each ELB elb_health_check(client, monitored_elb_names) ### Metric Checks if len(elb_no_instances) != 0: for _, elb in enumerate(elb_no_instances): elb_instances_unhealthy.append(elb) print "ELB: %s has no instances behind it. Please investigate." % elb ### Unhealthy count check elb_instances_unhealthy_metric = len(elb_instances_unhealthy) if elb_instances_unhealthy_metric != 0: for _, unhealthy in enumerate(elb_instances_unhealthy): print unhealthy # ''' Now that we know if this instance is missing, feed zabbix ''' mts = MetricSender(verbose=args.verbose, debug=args.debug) mts.add_metric({'openshift.aws.elb.health' : elb_instances_unhealthy_metric}) mts.send_metrics()