def run(self): ''' main function ''' while True: event_list = [] while not self.queue.empty(): event = self.queue.get() if self.args.debug: print "Processing event: {}".format(str(event)) event_list.append(event) # initialize event counts so that we send '0' events # in the case where no events were received event_counts = {} for event_type in self.args.watch_for: event_counts[event_type] = 0 for event in event_list: event_counts[event] += 1 if self.args.verbose or self.args.dry_run: print "Got events: " + str(event_counts) if not self.args.dry_run: zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) for event in event_counts.keys(): key = ZBX_KEY + event.lower() zagg_sender.add_zabbix_keys({key: event_counts[event]}) zagg_sender.send_metrics() time.sleep(self.args.reporting_period)
def main(self): """ Main function. """ # Reason: disable pylint import-error because urllib3 isn't loaded on jenkins. # pylint: disable=no-member requests.packages.urllib3.disable_warnings(requests.packages.urllib3.exceptions.InsecureRequestWarning) # pylint: enable=no-member zag = ZaggSender() yaml_config = {} config_path = '/etc/openshift_tools/sso-config.yaml' if os.path.isfile(config_path): with open(config_path, 'r') as sso_config: yaml_config = yaml.load(sso_config) checks = { "sso.service.not.reachable": yaml_config["idp_host"], "sso.container.not.reachable": "127.0.0.1:8443" } for zabbix_key, host in checks.iteritems(): key_value = 0 sso_status = self.check_http(host) if sso_status != 200: key_value += 1 zag.add_zabbix_keys({zabbix_key: key_value}) zag.send_metrics() running_check = "sso.monitoring.container.running" container_running = 1 zag.add_zabbix_keys({running_check: container_running}) zag.send_metrics()
def run(self): ''' main function ''' while True: event_list = [] while not self.queue.empty(): event = self.queue.get() if self.args.debug: print "Processing event: {}".format(str(event)) event_list.append(event) # initialize event counts so that we send '0' events # in the case where no events were received event_counts = {} for zbx_key in self.zbx_keys: event_counts[zbx_key] = 0 # add up each distinct event for event in event_list: event_counts[event] += 1 if self.args.verbose or self.args.dry_run: print "Got events: " + str(event_counts) if not self.args.dry_run: zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) for event, count in event_counts.iteritems(): zagg_sender.add_zabbix_keys({event: count}) zagg_sender.send_metrics() time.sleep(self.args.reporting_period)
class OpenshiftPodChecker(object): """ Checks for Openshift Pods """ def __init__(self): self.args = None self.ora = None self.zagg_sender = None def run(self): """ Main function to run the check """ self.parse_args() self.ora = OpenshiftRestApi() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: self.get_pods() except Exception as ex: print "Problem retreiving pod data: %s " % ex.message self.zagg_sender.send_metrics() def get_pods(self): """ Gets pod data """ print "\nPerforming pod check ...\n" api_url = "/api/v1/pods" if (str(self.args.namespace) != "None") & (str(self.args.namespace) != "all"): api_url = "/api/v1/namespaces/{}/pods".format(self.args.namespace) api_yaml = self.ora.get(api_url, rtype="text") pods = yaml.safe_load(api_yaml) pod_count = 0 for pod in pods["items"]: if self.args.pod and self.args.pod in pod["metadata"]["name"]: print "status of {} is {}".format(pod["metadata"]["name"], pod["status"]["phase"]) if pod["status"]["phase"] == "Running": pod_count += 1 else: pass self.zagg_sender.add_zabbix_keys({"service.pod.{}.count".format(self.args.pod): pod_count}) def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description="Openshift pod sender") parser.add_argument("-p", "--pod", default=None, help="Check for pod with this specific name") parser.add_argument("-n", "--namespace", default=None, help='Check for pods in this namespace - "all" for all') parser.add_argument("-v", "--verbose", action="store_true", default=None, help="Verbose?") parser.add_argument("--debug", action="store_true", default=None, help="Debug?") self.args = parser.parse_args()
class OpenshiftSkyDNSZaggClient(object): """ Checks for the Openshift Master SkyDNS """ def __init__(self): self.args = None self.zagg_sender = None self.ora = OpenshiftRestApi() self.dns_host = '127.0.0.1' self.dns_port = 53 self.openshift_services = [] def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) if self.check_dns_port_alive(): self.get_openshift_services() self.do_dns_check() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Network metric sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args() def check_dns_port_alive(self): """ Verify that the DNS port (TCP 53) is alive """ print "\nPerforming Openshift DNS port check..." try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(1) s.connect((self.dns_host, self.dns_port)) s.close() print "\nOpenshift SkyDNS host: %s, port: %s is OPEN" % (self.dns_host, self.dns_port) print "================================================\n" self.zagg_sender.add_zabbix_keys({'openshift.master.skydns.port.open' : 1}) return True except socket.error, e: print "\nOpenshift SkyDNS host: %s, port: %s is CLOSED" % (self.dns_host, self.dns_port) print "Python Error: %s" % e print "================================================\n" self.zagg_sender.add_zabbix_keys({'openshift.master.skydns.port.open' : 0}) return False
def report_to_zabbix(total_expired_snapshots, total_deleted_snapshots, total_deletion_errors): """ Sends the commands exit code to zabbix. """ zs = ZaggSender(verbose=True) zs.add_zabbix_keys({ EXPIRED_SNAPSHOTS_KEY: total_expired_snapshots, DELETED_SNAPSHOTS_KEY: total_deleted_snapshots, DELETION_ERRORS_KEY: total_deletion_errors }) zs.send_metrics()
class DnsmasqZaggClient(object): """ Checks for the dnsmasq local DNS cache """ def __init__(self): self.args = None self.zagg_sender = None self.dns_host_ip = socket.gethostbyname(socket.gethostname()) self.dns_port = 53 def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) if self.check_dns_port_alive(): self.do_dns_check() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Network metric sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args() def check_dns_port_alive(self): """ Verify that the DNS port (TCP 53) is alive """ print "\nPerforming DNS port check..." try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.settimeout(1) s.connect((self.dns_host_ip, self.dns_port)) s.close() print "\ndnsmasq host: %s, port: %s is OPEN" % (self.dns_host_ip, self.dns_port) print "================================================\n" self.zagg_sender.add_zabbix_keys({'dnsmasq.port.open' : 1}) return True except socket.error, e: print "\ndnsmasq host: %s, port: %s is CLOSED" % (self.dns_host_ip, self.dns_port) print "Python Error: %s" % e print "================================================\n" self.zagg_sender.add_zabbix_keys({'dnsmasq.port.open' : 0}) return False
def main(): ''' Do the application creation ''' print '################################################################################' print ' Starting App Create' print '################################################################################' namespace = 'ops-monitor-' + os.environ['ZAGG_CLIENT_HOSTNAME'] oocmd = OpenShiftOC(namespace, 'hello-openshift', verbose=False) app = 'openshift/hello-openshift:v1.0.6' start_time = time.time() if namespace in oocmd.get_projects(): oocmd.delete_project() oocmd.new_project() oocmd.new_app(app) create_app = 1 pod = None # Now we wait until the pod comes up for _ in range(24): time.sleep(5) pod = oocmd.get_pod() if pod and pod['status']: print 'Polling Pod status: %s' % pod['status']['phase'] if pod and pod['status']['phase'] == 'Running' and pod['status'].has_key('podIP'): #c_results = curl(pod['status']['podIP'], '8080') #if c_results == 'Hello OpenShift!\n': print 'Finished.' print 'State: Success' print 'Time: %s' % str(time.time() - start_time) create_app = 0 break else: print 'Finished.' print 'State: Fail' print 'Time: %s' % str(time.time() - start_time) print 'Fetching Events:' oocmd.verbose = True print oocmd.get_events() print 'Fetching Logs:' print oocmd.get_logs() print 'Fetching Pod:' print pod if namespace in oocmd.get_projects(): oocmd.delete_project() zgs = ZaggSender() zgs.add_zabbix_keys({'openshift.master.app.create': create_app}) zgs.send_metrics()
def main(): """ Main function to run the check """ args = parse_args() zagg_sender = ZaggSender(verbose=args.verbose, debug=args.debug) discovery_key_disk = 'disc.disk' interval = 3 pcp_disk_dev_metrics = ['disk.dev.total', 'disk.dev.avactive'] item_prototype_macro_disk = '#OSO_DISK' item_prototype_key_tps = 'disc.disk.tps' item_prototype_key_putil = 'disc.disk.putil' disk_metrics = pminfo.get_sampled_data(pcp_disk_dev_metrics, interval, 2) pcp_metrics_divided = {} for metric in pcp_disk_dev_metrics: pcp_metrics_divided[metric] = { k: v for k, v in disk_metrics.items() if metric in k } # do TPS checks; use disk.dev.total filtered_disk_totals = clean_up_metric_dict( pcp_metrics_divided[pcp_disk_dev_metrics[0]], pcp_disk_dev_metrics[0] + '.') # Add dynamic items zagg_sender.add_zabbix_dynamic_item(discovery_key_disk, item_prototype_macro_disk, filtered_disk_totals.keys()) # calculate the TPS and add them to the ZaggSender for disk, totals in filtered_disk_totals.iteritems(): disk_tps = (totals[1] - totals[0]) / interval zagg_sender.add_zabbix_keys( {'%s[%s]' % (item_prototype_key_tps, disk): disk_tps}) # do % Util checks; use disk.dev.avactive filtered_disk_totals = clean_up_metric_dict( pcp_metrics_divided[pcp_disk_dev_metrics[1]], pcp_disk_dev_metrics[1] + '.') # calculate the % Util and add them to the ZaggSender for disk, totals in filtered_disk_totals.iteritems(): total_active = (float)(totals[1] - totals[0]) / 1000.0 putil = 100 * total_active / interval zagg_sender.add_zabbix_keys( {'%s[%s]' % (item_prototype_key_putil, disk): putil}) zagg_sender.send_metrics()
class OpenshiftKubeconfigChecker(object): """ Checks whether kubeconfig is valid yaml """ def __init__(self): self.args = None self.zagg_sender = None def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) status = self.parse_config() self.zagg_sender.add_zabbix_keys( {"openshift.kubeconfig.status": status}) self.zagg_sender.send_metrics() def parse_config(self): """ Load the kubeconfig """ print "\nAttempt to load the kubeconfig\n" try: yaml.load(open(self.args.config)) return 0 except Exception as ex: print "Failed parsing config %s " % ex.message return 1 def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser( description='Openshift kubeconfig checker') parser.add_argument('-c', '--config', \ help='kubeconfig to parse (default /etc/origin/master/admin.kubeconfig)', \ default='/etc/origin/master/admin.kubeconfig') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args()
def main(): """ Main function to run the check """ args = parse_args() zagg_sender = ZaggSender(verbose=args.verbose, debug=args.debug) discovery_key_network = 'disc.network' pcp_network_dev_metrics = [ 'network.interface.in.bytes', 'network.interface.out.bytes' ] item_proto_macro_network = '#OSO_NET_INTERFACE' item_proto_key_in_bytes = 'disc.network.in.bytes' item_proto_key_out_bytes = 'disc.network.out.bytes' network_metrics = pminfo.get_metrics(pcp_network_dev_metrics) pcp_metrics_divided = {} for metric in pcp_network_dev_metrics: pcp_metrics_divided[metric] = { k: v for k, v in network_metrics.items() if metric in k } # do Network In; use network.interface.in.bytes filtered_network_totals = clean_up_metric_dict( pcp_metrics_divided[pcp_network_dev_metrics[0]], pcp_network_dev_metrics[0] + '.') # Add dynamic items zagg_sender.add_zabbix_dynamic_item(discovery_key_network, item_proto_macro_network, filtered_network_totals.keys()) # Report Network IN bytes; them to the ZaggSender for interface, total in filtered_network_totals.iteritems(): zagg_sender.add_zabbix_keys( {'%s[%s]' % (item_proto_key_in_bytes, interface): total}) # Report Network OUT Bytes; use network.interface.out.bytes filtered_network_totals = clean_up_metric_dict( pcp_metrics_divided[pcp_network_dev_metrics[1]], pcp_network_dev_metrics[1] + '.') # calculate the % Util and add them to the ZaggSender for interface, total in filtered_network_totals.iteritems(): zagg_sender.add_zabbix_keys( {'%s[%s]' % (item_proto_key_out_bytes, interface): total}) zagg_sender.send_metrics()
def kill(self): ''' class entrypoint ''' self.parse_args() self.current_time = time.time() haproxy_procs_etimes = self.get_all_haproxy_procs() # identify most recent haproxy process # and remove it from list of haproxy processes try: youngest_etimes = min(haproxy_procs_etimes.keys()) youngest_pid = haproxy_procs_etimes[youngest_etimes] self.dprint("Youngest haproxy PID: {}".format(youngest_pid)) haproxy_procs_etimes.pop(youngest_etimes) except ValueError: pass # find processes that have connections only in 'CLOSE-WAIT' state kill_list = [] for proc in haproxy_procs_etimes.values(): try: only_close_wait = True process = psutil.Process(proc) for conn in process.connections(): if conn.status != 'CLOSE_WAIT' and conn.status != 'FIN_WAIT2': only_close_wait = False break if only_close_wait: self.dprint("PID: {} marked for removal".format(proc)) kill_list.append(proc) except psutil.NoSuchProcess: pass # stop processes on the kill_list kill_count = 0 for proc in kill_list: try: process = psutil.Process(proc) self.dprint("Stopping PID: {}".format(process.pid)) process.kill() kill_count += 1 except psutil.NoSuchProcess: pass print "Stopped {} haproxy processes".format(kill_count) zgs = ZaggSender() zgs.add_zabbix_keys({ZABBIX_KEY: kill_count}) zgs.send_metrics()
def main(): ''' Gather and send details on all visible S3 buckets ''' logger.info("start") discovery_key = "disc.aws" discovery_macro = "#S3_BUCKET" prototype_s3_size = "disc.aws.size" prototype_s3_count = "disc.aws.objects" args = parse_args() if args.verbose: logger.setLevel(logging.DEBUG) logger.debug("verbose flag set") ocutil = OCUtil() dc_yaml = ocutil.get_dc('docker-registry') registry_config_secret = get_registry_config_secret(dc_yaml) oc_yaml = ocutil.get_secrets(registry_config_secret) aws_access, aws_secret = get_aws_creds(oc_yaml) awsutil = AWSUtil(aws_access, aws_secret, args.debug) bucket_list = awsutil.get_bucket_list(args.debug) bucket_stats = {} for bucket in bucket_list: s3_size, s3_objects = awsutil.get_bucket_info(bucket, args.debug) bucket_stats[bucket] = {"size": s3_size, "objects": s3_objects} if args.debug: print "Bucket stats: " + str(bucket_stats) if args.test: print "Test-only. Received results: " + str(bucket_stats) else: zgs = ZaggSender(verbose=args.debug) zgs.add_zabbix_dynamic_item(discovery_key, discovery_macro, bucket_list) for bucket in bucket_stats.keys(): zab_key = "{}[{}]".format(prototype_s3_size, bucket) zgs.add_zabbix_keys( {zab_key: int(round(bucket_stats[bucket]["size"]))}) zab_key = "{}[{}]".format(prototype_s3_count, bucket) zgs.add_zabbix_keys({zab_key: bucket_stats[bucket]["objects"]}) zgs.send_metrics()
def main(self): """ Main function. """ yaml_config = {} config_path = '/etc/openshift_tools/sso-config.yaml' if os.path.isfile(config_path): with open(config_path, 'r') as sso_config: yaml_config = yaml.load(sso_config) zag = ZaggSender() ops_accounts = self.check_accounts(yaml_config["aws_account_file"]) zabbix_key = "sso.iam.not.reachable" key_value = 0 for account in ops_accounts: account_name, account_number = account.split(':') try: temp_client = self.get_token(account_number, yaml_config["idp_host"]) except botocore.exceptions.ClientError as client_error: if 'Not authorized to perform sts:AssumeRoleWithSAML' in client_error.message: print('Error: not authorized to use SSO tokens with %s' % account_name) key_value += 1 if not temp_client: continue try: acc_status = temp_client.get_role(RoleName='iam_monitoring') if acc_status['ResponseMetadata']['HTTPStatusCode'] != 200: print("HTTP request failed on account %s (%s)" \ % (account_name, account_number)) key_value += 1 if not acc_status['Role']['AssumeRolePolicyDocument']: print("No policy document returned for account %s (%s)" \ % (account_name, account_number)) key_value += 1 except botocore.exceptions.ClientError as boto_exception: print("Failed on account %s (%s) due to exception: %s" \ %(account_name, account_number, str(boto_exception))) key_value += 1 zag.add_zabbix_keys({zabbix_key: key_value}) zag.send_metrics()
def main(self): """ Main function. """ zag = ZaggSender() config_path = '/secrets/aws_config.yml' config_file = self.get_config(config_path) time_stamp_path = config_file['ocav_timestamp_path'] check = 'clam.update.signatures.not.updating' stamp_status = self.check_clam(time_stamp_path) zag.add_zabbix_keys({check: stamp_status}) zag.send_metrics()
def send_zagg_data(keep_time): ''' send data to Zagg''' logger.debug('send_zagg_data()') zgs_time = time.time() zgs = ZaggSender() zgs.add_zabbix_keys( {'openshift.master.project.terminating.time': keep_time}) try: zgs.send_metrics() except: logger.exception('Error sending to Zagg') logger.info("Data sent in %s seconds", str(time.time() - zgs_time))
def send_zagg_data(bucket_list, bucket_stats, args): '''send data to zabbix ''' discovery_key = "disc.aws" discovery_macro = "#S3_BUCKET" prototype_s3_size = "disc.aws.size" prototype_s3_count = "disc.aws.objects" zgs = ZaggSender(verbose=args.debug) zgs.add_zabbix_dynamic_item(discovery_key, discovery_macro, bucket_list) for bucket in bucket_stats.keys(): zab_key = "{}[{}]".format(prototype_s3_size, bucket) zgs.add_zabbix_keys( {zab_key: int(round(bucket_stats[bucket]["size"]))}) zab_key = "{}[{}]".format(prototype_s3_count, bucket) zgs.add_zabbix_keys({zab_key: bucket_stats[bucket]["objects"]}) zgs.send_metrics()
def main(): ''' Do the application creation ''' proj_name = 'ops-monitor' app = 'openshift/hello-openshift:v1.0.6' verbose = False if len(sys.argv) > 1 and sys.argv[1] == '-v': verbose = True start_time = time.time() if proj_name in OpenShiftOC.get_projects(verbose): OpenShiftOC.delete_project(proj_name, verbose) OpenShiftOC.new_project(proj_name, verbose) OpenShiftOC.new_app(app, proj_name, verbose) create_app = 1 # Now we wait until the pod comes up for _ in range(24): time.sleep(5) pod = OpenShiftOC.get_pod('hello-openshift', proj_name, verbose) if pod and pod['status']: if verbose: print pod['status']['phase'] if pod and pod['status']['phase'] == 'Running' and pod[ 'status'].has_key('podIP'): #c_results = curl(pod['status']['podIP'], '8080') #if c_results == 'Hello OpenShift!\n': if verbose: print 'success' print 'Time: %s' % str(time.time() - start_time) create_app = 0 break else: if verbose: print 'Time: %s' % str(time.time() - start_time) print 'fail' if proj_name in OpenShiftOC.get_projects(verbose): OpenShiftOC.delete_project(proj_name, verbose) zgs = ZaggSender() zgs.add_zabbix_keys({'create_app': create_app}) zgs.send_metrics()
def main(): """ Main function to run the check """ argz = parse_args() proc_parts = argz.process_str.split() zagg_data = {} for proc in psutil.process_iter(): try: if proc_parts[0] == proc.name(): proc.dict = proc.as_dict(['cmdline', 'memory_info']) cmdline = proc.dict['cmdline'] if len(proc_parts) > 1 and len(cmdline) > 1: part_count = len(proc_parts[1:]) # This call might be confusing, (I know I will be in 2 weeks) so quick explanation: # if the process name matches above, it will check the rest of the strings # against the /proc/<pid>/cmdline contents, order shouldn't matter since all have to match if len(set(proc_parts[1:]).intersection(set(cmdline[1:1+part_count]))) != part_count: continue if argz.debug: print cmdline cpu_percent = '{0:.2f}'.format(proc.cpu_percent(interval=0.5)) mem_vms = '{0}'.format(getattr(proc.dict['memory_info'], 'vms')) mem_rss = '{0}'.format(getattr(proc.dict['memory_info'], 'rss')) zagg_data = {'{0}.cpu'.format(argz.zabbix_key_prefix) : cpu_percent, '{0}.mem.vms'.format(argz.zabbix_key_prefix) : mem_vms, '{0}.mem.rss'.format(argz.zabbix_key_prefix) : mem_rss} except psutil.NoSuchProcess: pass if argz.debug: try: print 'Process ({0}) is using {1} CPU and {2} {3} memory'.format(argz.process_str, cpu_percent, mem_vms, mem_rss) print 'Zagg will receive: {0}'.format(zagg_data) except NameError as ex: print 'No values: {0}'.format(ex) if zagg_data: zgs = ZaggSender(debug=argz.debug) zgs.add_zabbix_keys(zagg_data) zgs.send_metrics()
def main(): ''' Do the application creation ''' proj_name = 'ops-monitor-' + os.environ['ZAGG_CLIENT_HOSTNAME'] app = 'openshift/hello-openshift:v1.0.6' verbose = False if len(sys.argv) > 1 and sys.argv[1] == '-v': verbose = True start_time = time.time() if proj_name in OpenShiftOC.get_projects(verbose): OpenShiftOC.delete_project(proj_name, verbose) OpenShiftOC.new_project(proj_name, verbose) OpenShiftOC.new_app(app, proj_name, verbose) create_app = 1 # Now we wait until the pod comes up for _ in range(24): time.sleep(5) pod = OpenShiftOC.get_pod('hello-openshift', proj_name, verbose) if pod and pod['status']: if verbose: print pod['status']['phase'] if pod and pod['status']['phase'] == 'Running' and pod['status'].has_key('podIP'): #c_results = curl(pod['status']['podIP'], '8080') #if c_results == 'Hello OpenShift!\n': if verbose: print 'success' print 'Time: %s' % str(time.time() - start_time) create_app = 0 break else: if verbose: print 'Time: %s' % str(time.time() - start_time) print 'fail' if proj_name in OpenShiftOC.get_projects(verbose): OpenShiftOC.delete_project(proj_name, verbose) zgs = ZaggSender() zgs.add_zabbix_keys({'openshift.master.app.create': create_app}) zgs.send_metrics()
def report_to_zabbix(self, disc_key, disc_macro, item_proto_key, value): """ Sends the commands exit code to zabbix. """ zs = ZaggSender() # Add the dynamic item self.verbose_print("Adding the dynamic item to Zabbix - %s, %s, [%s]" % \ (disc_key, disc_macro, self.args.name)) zs.add_zabbix_dynamic_item(disc_key, disc_macro, [self.args.name]) # Send the value for the dynamic item self.verbose_print("Sending metric to Zabbix - %s[%s]: %s" % \ (item_proto_key, self.args.name, value)) zs.add_zabbix_keys({'%s[%s]' % (item_proto_key, self.args.name): value}) # Actually send them zs.send_metrics()
def report_to_zabbix(self, disc_key, disc_macro, item_proto_key, value): """ Sends the commands exit code to zabbix. """ zs = ZaggSender() # Add the dynamic item self.verbose_print("Adding the dynamic item to Zabbix - %s, %s, [%s]" % \ (disc_key, disc_macro, self.args.name)) zs.add_zabbix_dynamic_item(disc_key, disc_macro, [self.args.name]) # Send the value for the dynamic item self.verbose_print("Sending metric to Zabbix - %s[%s]: %s" % \ (item_proto_key, self.args.name, value)) zs.add_zabbix_keys({'%s[%s]' % (item_proto_key, self.args.name): value}) # Actually send them zs.send_metrics()
def main(): """ Get data and send to zabbix """ vswitch_ports_count = get_vswitch_ports() vswitch_pids_count = get_vswitch_pids() print "Found %s OVS ports" % vswitch_ports_count print "Found %s OVS pids" % vswitch_pids_count # we now have all the data we want. Let's send it to Zagg zs = ZaggSender() zs.add_zabbix_keys({"openshift.node.ovs.ports.count": vswitch_ports_count}) zs.add_zabbix_keys({"openshift.node.ovs.pids.count": vswitch_pids_count}) # Finally, sent them to zabbix zs.send_metrics()
def main(): ''' Get data and send to zabbix ''' vswitch_ports_count = get_vswitch_ports() vswitch_pids_count = get_vswitch_pids() print "Found %s OVS ports" % vswitch_ports_count print "Found %s OVS pids" % vswitch_pids_count # we now have all the data we want. Let's send it to Zagg zs = ZaggSender() zs.add_zabbix_keys({'openshift.node.ovs.ports.count': vswitch_ports_count}) zs.add_zabbix_keys({'openshift.node.ovs.pids.count': vswitch_pids_count}) # Finally, sent them to zabbix zs.send_metrics()
def main(self): """ Main function. """ zag = ZaggSender() config_dict = self.get_config( '/etc/openshift_tools/scanreport_config.yml') logfile = config_dict['opsad_log_file'] result_status = self.search_logfile(logfile) check = 'psad.found.scanner' zag.add_zabbix_keys({check: result_status}) zag.send_metrics() if result_status > 0: self.upload_data(config_dict)
def main(): """ Main function to run the check """ args = parse_args() zagg_sender = ZaggSender(verbose=args.verbose, debug=args.debug) discovery_key_disk = 'disc.disk' interval = 3 pcp_disk_dev_metrics = ['disk.dev.total', 'disk.dev.avactive'] item_prototype_macro_disk = '#OSO_DISK' item_prototype_key_tps = 'disc.disk.tps' item_prototype_key_putil = 'disc.disk.putil' disk_metrics = pminfo.get_sampled_data(pcp_disk_dev_metrics, interval, 2) pcp_metrics_divided = {} for metric in pcp_disk_dev_metrics: pcp_metrics_divided[metric] = {k: v for k, v in disk_metrics.items() if metric in k} # do TPS checks; use disk.dev.total filtered_disk_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_disk_dev_metrics[0]], pcp_disk_dev_metrics[0] + '.') # Add dynamic items zagg_sender.add_zabbix_dynamic_item(discovery_key_disk, item_prototype_macro_disk, filtered_disk_totals.keys()) # calculate the TPS and add them to the ZaggSender for disk, totals in filtered_disk_totals.iteritems(): disk_tps = (totals[1] - totals[0]) / interval zagg_sender.add_zabbix_keys({'%s[%s]' % (item_prototype_key_tps, disk): disk_tps}) # do % Util checks; use disk.dev.avactive filtered_disk_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_disk_dev_metrics[1]], pcp_disk_dev_metrics[1] + '.') # calculate the % Util and add them to the ZaggSender for disk, totals in filtered_disk_totals.iteritems(): total_active = (float)(totals[1] - totals[0]) / 1000.0 putil = 100 * total_active / interval zagg_sender.add_zabbix_keys({'%s[%s]' % (item_prototype_key_putil, disk): putil}) zagg_sender.send_metrics()
def main(): ''' Gather and send details on all visible S3 buckets ''' discovery_key = "disc.gcp" discovery_macro = "#GCS_BUCKET" prototype_bucket_size = "disc.gcp.size" prototype_bucket_count = "disc.gcp.objects" args = parse_args() ocutil = OCUtil() dc_yaml = ocutil.get_dc('docker-registry') registry_config_secret = get_registry_config_secret(dc_yaml) oc_yaml = ocutil.get_secrets(registry_config_secret) bucket = get_gcp_info(oc_yaml) gsutil = GcloudUtil(verbose=args.debug) bucket_list = gsutil.get_bucket_list() bucket_stats = {} for bucket in bucket_list: size, objects = gsutil.get_bucket_info(bucket) bucket_stats[bucket] = {"size": size, "objects": objects} if args.debug: print "Bucket stats: " + str(bucket_stats) if args.test: print "Test-only. Received results: " + str(bucket_stats) else: zgs = ZaggSender(verbose=args.debug) zgs.add_zabbix_dynamic_item(discovery_key, discovery_macro, bucket_list) for bucket in bucket_stats.keys(): zab_key = "{}[{}]".format(prototype_bucket_size, bucket) zgs.add_zabbix_keys({zab_key: int(round(bucket_stats[bucket]["size"]))}) zab_key = "{}[{}]".format(prototype_bucket_count, bucket) zgs.add_zabbix_keys({zab_key: bucket_stats[bucket]["objects"]}) zgs.send_metrics()
def main(): ''' Gather and examine details about this node within ELBs ''' args = parse_args() aws_access, aws_secret = get_aws_creds('/root/.aws/credentials') instance_region = get_instance_region() elb = boto.ec2.elb.connect_to_region(instance_region, aws_access_key_id=aws_access, aws_secret_access_key=aws_secret) instance_name = get_instance_name('/etc/openshift_tools/zagg_client.yaml') ''' Define what instance type this node is, only master/infra are in ELBs ''' if "master" in instance_name: instance_type = "master" if args.verbose: print "Instance %s type is master." % instance_name elif "infra" in instance_name: instance_type = "infra" if args.verbose: print "Instance %s type is infra." % instance_name else: print "%s is not an infra or master node. Nothing to do." exit() ''' Fetch the load balancers and make sure this instance is within them ''' elbs = elb.get_all_load_balancers() instance_id = get_instance_id() instance_missing = 0 for i in elbs: if instance_type in i.name: if not filter(lambda x: x.id == instance_id, i.instances): instance_missing = 1 if args.verbose: print "Instance %s is missing from ELB %s!" % (instance_id, i.name) ''' Now that we know if this instance is missing, feed zabbix ''' zs = ZaggSender(verbose=args.verbose, debug=args.debug) zs.add_zabbix_keys({'openshift.aws.elb.status' : instance_missing}) zs.send_metrics()
def main(): '''Run pminfo against a list of metrics. Sample metrics passed in for an amount of time and report data to zabbix ''' args, parser = parse_args() if not args.metrics: print print 'Please specify metrics with -m.' print parser.print_help() sys.exit(1) metrics = args.metrics interval = int(args.interval) count = int(args.count) # Gather sampled data data = pminfo.get_sampled_data(metrics, interval, count) zab_results = collections.defaultdict(list) for metric_name, val in data.items(): if 'kernel' in metric_name: for sample in range(len(val)): if sample + 1 == len(val): break zab_results[metric_name].append(pminfo.calculate_percent_cpu(val[sample], val[sample+1], interval)) else: print 'NOT SUPPORTED: [%s]' % metric_name if zab_results.get(metric_name, None) != None and (args.verbose or args.debug): print '%s: %.2f' % (metric_name, zab_results[metric_name][-1]) zab_results = get_averages(zab_results) # Send the data to zabbix if not args.test: zgs = ZaggSender(verbose=args.debug) zgs.add_zabbix_keys(zab_results) zgs.send_metrics()
def main(): ''' Get data from oadm and send to zabbix ''' ## set oadm config oadm_command = "KUBECONFIG=/etc/openshift/master/admin.kubeconfig /usr/bin/oadm" ## get list of running pods podlist_cmd = oadm_command + " manage-node --list-pods --selector=''" # get the output of oadm output = subprocess.check_output(podlist_cmd, shell=True) # pare down to only lines that contain "Running" running_pods_list = [p for p in output.split('\n') if "Running" in p] # we now have all the data we want. Let's send it to Zagg zs = ZaggSender() zs.add_zabbix_keys({'running_pods_count': len(running_pods_list)}) # Finally, sent them to zabbix zs.send_metrics()
def main(): """ Gather and send details on all visible S3 buckets """ discovery_key = "disc.aws" discovery_macro = "#S3_BUCKET" prototype_s3_size = "disc.aws.size" prototype_s3_count = "disc.aws.objects" args = parse_args() ocutil = OCUtil() oc_yaml = ocutil.get_secrets("dockerregistry") aws_access, aws_secret = get_aws_creds(oc_yaml) awsutil = AWSUtil(aws_access, aws_secret, args.debug) bucket_list = awsutil.get_bucket_list(args.debug) bucket_stats = {} for bucket in bucket_list: s3_size, s3_objects = awsutil.get_bucket_info(bucket, args.debug) bucket_stats[bucket] = {"size": s3_size, "objects": s3_objects} if args.debug: print "Bucket stats: " + str(bucket_stats) if args.test: print "Test-only. Received results: " + str(bucket_stats) else: zgs = ZaggSender(verbose=args.debug) zgs.add_zabbix_dynamic_item(discovery_key, discovery_macro, bucket_list) for bucket in bucket_stats.keys(): zab_key = "{}[{}]".format(prototype_s3_size, bucket) zgs.add_zabbix_keys({zab_key: int(round(bucket_stats[bucket]["size"]))}) zab_key = "{}[{}]".format(prototype_s3_count, bucket) zgs.add_zabbix_keys({zab_key: bucket_stats[bucket]["objects"]}) zgs.send_metrics()
def main(): """ Get data from oadm and send to zabbix """ ## set oadm config oadm_command = "KUBECONFIG=/etc/openshift/master/admin.kubeconfig /usr/bin/oadm" ## get list of running pods podlist_cmd = oadm_command + " manage-node --list-pods --selector=''" # get the output of oadm output = subprocess.check_output(podlist_cmd, shell=True) # pare down to only lines that contain "Running" running_pods_list = [p for p in output.split("\n") if "Running" in p] # we now have all the data we want. Let's send it to Zagg zs = ZaggSender() zs.add_zabbix_keys({"running_pods_count": len(running_pods_list)}) # Finally, sent them to zabbix zs.send_metrics()
def main(): """ Main function to run the check """ args = parse_args() zagg_sender = ZaggSender(verbose=args.verbose, debug=args.debug) discovery_key_network = 'disc.network' pcp_network_dev_metrics = ['network.interface.in.bytes', 'network.interface.out.bytes'] item_proto_macro_network = '#OSO_NET_INTERFACE' item_proto_key_in_bytes = 'disc.network.in.bytes' item_proto_key_out_bytes = 'disc.network.out.bytes' network_metrics = pminfo.get_metrics(pcp_network_dev_metrics) pcp_metrics_divided = {} for metric in pcp_network_dev_metrics: pcp_metrics_divided[metric] = {k: v for k, v in network_metrics.items() if metric in k} # do Network In; use network.interface.in.bytes filtered_network_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_network_dev_metrics[0]], pcp_network_dev_metrics[0] + '.') # Add dynamic items zagg_sender.add_zabbix_dynamic_item(discovery_key_network, item_proto_macro_network, filtered_network_totals.keys()) # Report Network IN bytes; them to the ZaggSender for interface, total in filtered_network_totals.iteritems(): zagg_sender.add_zabbix_keys({'%s[%s]' % (item_proto_key_in_bytes, interface): total}) # Report Network OUT Bytes; use network.interface.out.bytes filtered_network_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_network_dev_metrics[1]], pcp_network_dev_metrics[1] + '.') # calculate the % Util and add them to the ZaggSender for interface, total in filtered_network_totals.iteritems(): zagg_sender.add_zabbix_keys({'%s[%s]' % (item_proto_key_out_bytes, interface): total}) zagg_sender.send_metrics()
def main(self): """ Main function. """ zag = ZaggSender() yaml_config = {} config_path = '/etc/openshift_tools/rkhunter_config.yaml' if os.path.isfile(config_path): with open(config_path, 'r') as rkhunter_config: yaml_config = yaml.load(rkhunter_config) logfile = yaml_config["logfile"] checks = { "rkhunter.found.warning": r"\[ warning \]", "rkhunter.found.infection": r"INFECTED$" } for zabbix_key, search_term in checks.iteritems(): scan_status = self.check_rkhunter(search_term, logfile) zag.add_zabbix_keys({zabbix_key: scan_status}) zag.send_metrics()
def main(): """ Main function to run the check """ args = parse_args() zagg_sender = ZaggSender(verbose=args.verbose, debug=args.debug) filesys_full_metric = ['filesys.full'] filesys_inode_derived_metrics = {'filesys.inodes.pused' : 'filesys.usedfiles / (filesys.usedfiles + filesys.freefiles) * 100' } discovery_key_fs = 'disc.filesys' item_prototype_macro_fs = '#OSO_FILESYS' item_prototype_key_full = 'disc.filesys.full' item_prototype_key_inode = 'disc.filesys.inodes.pused' # Get the disk space filesys_full_metrics = pminfo.get_metrics(filesys_full_metric) filtered_filesys_metrics = filter_out_docker_filesystems(filesys_full_metrics, 'filesys.full.') zagg_sender.add_zabbix_dynamic_item(discovery_key_fs, item_prototype_macro_fs, filtered_filesys_metrics.keys()) for filesys_name, filesys_full in filtered_filesys_metrics.iteritems(): zagg_sender.add_zabbix_keys({'%s[%s]' % (item_prototype_key_full, filesys_name): filesys_full}) # Get filesytem inode metrics filesys_inode_metrics = pminfo.get_metrics(derived_metrics=filesys_inode_derived_metrics) filtered_filesys_inode_metrics = filter_out_docker_filesystems(filesys_inode_metrics, 'filesys.inodes.pused.') for filesys_name, filesys_inodes in filtered_filesys_inode_metrics.iteritems(): zagg_sender.add_zabbix_keys({'%s[%s]' % (item_prototype_key_inode, filesys_name): filesys_inodes}) zagg_sender.send_metrics()
def main(): """ Main function to run the check """ argz = parse_args() conn_count = 0 for proc in psutil.process_iter(): try: if proc.name() == argz.proc_to_check: if argz.debug: print proc.connections() for conn in proc.connections(): if conn.status == argz.conn_status and conn.laddr[ 1] == argz.port: conn_count += 1 except psutil.NoSuchProcess: pass if argz.debug: print 'Process ({0}) on port {1} has {2} connections in {3} status'.format( argz.proc_to_check, argz.port, conn_count, argz.conn_status) zgs = ZaggSender(debug=argz.debug) zgs.add_zabbix_keys({'{0}'.format(argz.zabbix_key): conn_count}) zgs.send_metrics()
def main(): ''' Do the application creation ''' proj_name = 'ops-monitor-appbuild' + os.environ['ZAGG_CLIENT_HOSTNAME'] app = 'nodejs-example' verbose = True start_time = time.time() if proj_name in OpenShiftOC.get_projects(verbose): OpenShiftOC.delete_project(proj_name, verbose) OpenShiftOC.new_project(proj_name, verbose) OpenShiftOC.new_app(app, proj_name, verbose) #1 is error create_app = 1 BuildTime = 0 CreateTime = 0 # Now we wait until the pod comes up for _ in range(24): time.sleep(10) #checking the building pod buildPod = OpenShiftOC.get_build_pod(app, proj_name, verbose) if buildPod and buildPod['status']['phase'] == 'Failed': BuildTime = time.time() - start_time print 'fail' break if buildPod and buildPod['status']['phase'] == 'Succeeded': BuildTime = time.time() - start_time for _ in range(24): time.sleep(5) create_app = check_route(app, proj_name, verbose) if create_app == 0: CreateTime = time.time() - start_time print 'success' print 'Time: %s' % CreateTime print 'BuildTime: %s' % BuildTime break if create_app == 0: break else: BuildTime = time.time() - start_time print 'BuildTime: %s' % BuildTime print 'fail' if proj_name in OpenShiftOC.get_projects(verbose): OpenShiftOC.delete_project(proj_name, verbose) zgs = ZaggSender() zgs.add_zabbix_keys({'openshift.master.app.build.create': create_app}) zgs.add_zabbix_keys({'openshift.master.app.create.time': CreateTime}) zgs.add_zabbix_keys({'openshift.master.app.build.time': BuildTime}) zgs.send_metrics()
def report_to_zabbix(self, total_snapshottable_vols, total_snapshots_created, total_snapshot_creation_errors): """ Sends the commands exit code to zabbix. """ zs = ZaggSender(verbose=True) # Populate EBS_SNAPSHOTTER_DISC_SCHEDULE_MACRO with the schedule zs.add_zabbix_dynamic_item(EBS_SNAPSHOTTER_DISC_KEY, EBS_SNAPSHOTTER_DISC_SCHEDULE_MACRO, \ [self.args.with_schedule]) # Send total_snapshottable_vols prototype item key and value zs.add_zabbix_keys({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOTTABLE_VOLUMES_KEY, self.args.with_schedule): \ total_snapshottable_vols}) # Send total_snapshots_created prototype item key and value zs.add_zabbix_keys({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOTS_CREATED_KEY, self.args.with_schedule): \ total_snapshots_created}) # Send total_snapshot_creation_errors prototype item key and value zs.add_zabbix_keys({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOT_CREATION_ERRORS_KEY, self.args.with_schedule): \ total_snapshot_creation_errors}) # Actually send them zs.send_metrics()
def send_zagg_data(build_ran, create_app, http_code, run_time): """ send data to Zagg""" logger.debug("send_zagg_data()") zgs_time = time.time() zgs = ZaggSender() logger.info("Send data to Zagg") if build_ran == 1: zgs.add_zabbix_keys({'openshift.master.app.build.create': create_app}) zgs.add_zabbix_keys( {'openshift.master.app.build.create.code': http_code}) zgs.add_zabbix_keys( {'openshift.master.app.build.create.time': run_time}) else: zgs.add_zabbix_keys({'openshift.master.app.create': create_app}) zgs.add_zabbix_keys({'openshift.master.app.create.code': http_code}) zgs.add_zabbix_keys({'openshift.master.app.create.time': run_time}) try: zgs.send_metrics() logger.info("Data sent to Zagg in %s seconds", str(time.time() - zgs_time)) except: logger.error("Error sending data to Zagg: %s \n %s ", sys.exc_info()[0], sys.exc_info()[1])
self.rules = None ZBX_KEY = "openshift.node.ovs.stray.rules" if __name__ == "__main__": ovs_fixer = OVS() zgs = ZaggSender() # Dev says rules before ports since OpenShift will set up ports, then rules ovs_fixer.get_rule_list() ovs_ports = ovs_fixer.get_port_list() ovs_bad_rules = ovs_fixer.find_bad_rules() # Report bad/stray rules count before removing zgs.add_zabbix_keys({ZBX_KEY: len(ovs_bad_rules)}) zgs.send_metrics() print "Good ports: {0}".format(str(ovs_ports)) print "Bad rules: {0}".format(str(ovs_bad_rules)) ovs_fixer.remove_rules(ovs_bad_rules) # Refresh list of rules after the removals ovs_fixer.get_rule_list(force_refresh=True) ovs_bad_rules = ovs_fixer.find_bad_rules() print "Bad rules after removals: {0}".format(str(ovs_bad_rules)) # Report new bad/stray rule count after removal zgs.add_zabbix_keys({ZBX_KEY: len(ovs_bad_rules)})
class OpsZaggClient(object): """ class to send data to zagg """ def __init__(self): self.zagg_sender = None self.args = None self.config = None self.pcp_metrics = [] self.heartbeat = None def run(self): """ main function to run the script """ self.parse_args() self.parse_config(self.args.config_file) self.config_zagg_sender() if self.args.send_pcp_metrics: self.add_pcp_metrics() if self.args.send_heartbeat: self.add_heartbeat() if self.args.key and self.args.value: self.add_zabbix_key() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Zagg metric sender') parser.add_argument('--send-pcp-metrics', help="send pcp metrics to zagg", action="store_true") parser.add_argument('--send-heartbeat', help="send heartbeat metric to zagg", action="store_true") parser.add_argument('-s', '--host', help='specify host name as registered in Zabbix') parser.add_argument('-z', '--zagg-server', help='hostname of IP of Zagg server') parser.add_argument('--zagg-user', help='username of the Zagg server') parser.add_argument('--zagg-pass', help='password of the Zagg server') parser.add_argument('-k', '--key', help='zabbix key') parser.add_argument('-o', '--value', help='zabbix value') parser.add_argument('-c', '--config-file', help='ops-zagg-client config file', default='/etc/openshift_tools/zagg_client.yaml') self.args = parser.parse_args() def parse_config(self, config_file): """ parse config file """ self.config = yaml.load(file(config_file)) def config_zagg_sender(self): """ configure the zagg_sender """ zagg_server = self.args.zagg_server if self.args.zagg_server else self.config['zagg']['host'] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user'] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass'] host = self.args.host if self.args.host else self.config['host']['name'] zagg_conn = ZaggConnection(host=zagg_server, user=zagg_user, password=zagg_password, ) self.zagg_sender = ZaggSender(host, zagg_conn) def add_heartbeat(self): """ crate a hearbeat metric """ heartbeat = ZaggHeartbeat(templates=self.config['heartbeat']['templates'], hostgroups=self.config['heartbeat']['hostgroups'], ) self.zagg_sender.add_heartbeat(heartbeat) def add_pcp_metrics(self): """ collect pcp metrics to send to ZaggSender """ self.zagg_sender.add_pcp_metrics(self.config['pcp']['metrics']) def add_zabbix_key(self): """ send zabbix key/value pair to zagg """ self.zagg_sender.add_zabbix_keys({self.args.key : self.args.value})
class OpenshiftMasterZaggClient(object): """ Checks for the Openshift Master """ def __init__(self): self.args = None self.zagg_sender = None self.ora = None self.zabbix_api_key = None self.zabbix_healthz_key = None def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) if self.args.local: self.ora = OpenshiftRestApi() self.args.api_ping = True self.args.healthz = True self.zabbix_api_key = 'openshift.master.local.api.ping' self.zabbix_healthz_key = 'openshift.master.local.api.healthz' else: master_cfg_from_yaml = [] with open('/etc/origin/master/master-config.yaml', 'r') as yml: master_cfg_from_yaml = yaml.load(yml) self.ora = OpenshiftRestApi(host=master_cfg_from_yaml['oauthConfig']['masterURL'], verify_ssl=True) self.zabbix_api_key = 'openshift.master.api.ping' self.zabbix_healthz_key = 'openshift.master.api.healthz' try: if self.args.healthz or self.args.all_checks: self.healthz_check() except Exception as ex: print "Problem performing healthz check: %s " % ex.message self.zagg_sender.add_zabbix_keys({self.zabbix_healthz_key: 'false'}) try: if self.args.api_ping or self.args.all_checks: self.api_ping() if self.args.project_count or self.args.all_checks: self.project_count() if self.args.pod_count or self.args.all_checks: self.pod_count() if self.args.user_count or self.args.all_checks: self.user_count() if self.args.pv_info or self.args.all_checks: self.pv_info() if self.args.nodes_not_ready or self.args.all_checks: self.nodes_not_ready() except Exception as ex: print "Problem Openshift API checks: %s " % ex.message self.zagg_sender.add_zabbix_keys({self.zabbix_api_key: 0}) # Openshift API is down try: if self.args.metrics or self.args.all_checks: self.metric_check() except Exception as ex: print "Problem getting Openshift metrics at /metrics: %s " % ex.message self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Network metric sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') parser.add_argument('-l', '--local', action='store_true', default=False, help='Run local checks against the local API (https://127.0.0.1)') master_check_group = parser.add_argument_group('Different Checks to Perform') master_check_group.add_argument('--all-checks', action='store_true', default=None, help='Do all of the checks') master_check_group.add_argument('--api-ping', action='store_true', default=None, help='Verify the Openshift API is alive') master_check_group.add_argument('--healthz', action='store_true', default=None, help='Query the Openshift Master API /healthz') master_check_group.add_argument('--metrics', action='store_true', default=None, help='Query the Openshift Master Metrics at /metrics') master_check_group.add_argument('--project-count', action='store_true', default=None, help='Query the Openshift Master for Number of Pods') master_check_group.add_argument('--pod-count', action='store_true', default=None, help='Query the Openshift Master for Number of Running Pods') master_check_group.add_argument('--user-count', action='store_true', default=None, help='Query the Openshift Master for Number of Users') master_check_group.add_argument('--pv-info', action='store_true', default=None, help='Query the Openshift Master for Persistent Volumes Info') master_check_group.add_argument('--nodes-not-ready', action='store_true', default=None, help='Query the Openshift Master for number of nodes not in Ready state') self.args = parser.parse_args() def api_ping(self): """ Verify the Openshift API health is responding correctly """ print "\nPerforming Openshift API ping check..." response = self.ora.get('/api/v1/nodes') print "\nOpenshift API ping is alive" print "Number of nodes in the Openshift cluster: %s" % len(response['items']) self.zagg_sender.add_zabbix_keys({self.zabbix_api_key: 1, 'openshift.master.node.count': len(response['items'])}) def healthz_check(self): """ check the /healthz API call """ print "\nPerforming /healthz check..." response = self.ora.get('/healthz', rtype='text') print "healthz check returns: %s " %response self.zagg_sender.add_zabbix_keys({self.zabbix_healthz_key: str('ok' in response).lower()}) def metric_check(self): """ collect certain metrics from the /metrics API call """ print "\nPerforming /metrics check..." response = self.ora.get('/metrics', rtype='text') for metric_type in text_string_to_metric_families(response): # Collect the apiserver_request_latencies_summary{resource="pods",verb="LIST",quantiles in /metrics # Collect the apiserver_request_latencies_summary{resource="pods",verb="WATCHLIST",quantiles in /metrics if metric_type.name == 'apiserver_request_latencies_summary': key_str = 'openshift.master.apiserver.latency.summary' for sample in metric_type.samples: if (sample[1]['resource'] == 'pods' and sample[1].has_key('quantile') and 'LIST' in sample[1]['verb']): curr_key_str = key_str + ".pods.quantile.%s.%s" % (sample[1]['verb'], sample[1]['quantile'].split('.')[1]) if math.isnan(sample[2]): value = 0 else: value = sample[2] self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)}) # Collect the scheduler_e2e_scheduling_latency_microseconds{quantiles in /metrics if metric_type.name == 'scheduler_e2e_scheduling_latency_microseconds': for sample in metric_type.samples: if sample[1].has_key('quantile'): key_str = 'openshift.master.scheduler.e2e.scheduling.latency' curr_key_str = key_str + ".quantile.%s" % (sample[1]['quantile'].split('.')[1]) if math.isnan(sample[2]): value = 0 else: value = sample[2] self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)}) self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 1}) # def project_count(self): """ check the number of projects in Openshift """ print "\nPerforming project count check..." excluded_names = ['openshift', 'openshift-infra', 'default', 'ops-monitor'] response = self.ora.get('/oapi/v1/projects') project_names = [project['metadata']['name'] for project in response['items']] valid_names = set(project_names) - set(excluded_names) print "Project count: %s" % len(valid_names) self.zagg_sender.add_zabbix_keys({'openshift.project.count' : len(valid_names)}) def pod_count(self): """ check the number of pods in Openshift """ print "\nPerforming pod count check..." response = self.ora.get('/api/v1/pods') # Get running pod count running_pod_count = 0 for i in response['items']: if 'containerStatuses' in i['status']: if 'running' in i['status']['containerStatuses'][0]['state']: running_pod_count += 1 # Get running pod count on compute only nodes (non-infra) running_user_pod_count = 0 for i in response['items']: if 'containerStatuses' in i['status']: if 'running' in i['status']['containerStatuses'][0]['state']: if 'nodeSelector' in i['spec']: if i['spec']['nodeSelector']['type'] == 'compute': running_user_pod_count += 1 print "Total pod count: %s" % len(response['items']) print "Running pod count: %s" % running_pod_count print "User Running pod count: %s" % running_user_pod_count self.zagg_sender.add_zabbix_keys({'openshift.master.pod.running.count' : running_pod_count, 'openshift.master.pod.user.running.count' : running_user_pod_count, 'openshift.master.pod.total.count' : len(response['items'])}) def user_count(self): """ check the number of users in Openshift """ print "\nPerforming user count check..." response = self.ora.get('/oapi/v1/users') print "Total user count: %s" % len(response['items']) self.zagg_sender.add_zabbix_keys({'openshift.master.user.count' : len(response['items'])}) def pv_info(self): """ Gather info about the persistent volumes in Openshift """ print "\nPerforming user persistent volume count...\n" response = self.ora.get('/api/v1/persistentvolumes') pv_capacity_total = 0 pv_capacity_available = 0 pv_types = {'Available': 0, 'Bound': 0, 'Released': 0, 'Failed': 0} # Dynamic items variables discovery_key_pv = 'disc.pv' item_prototype_macro_pv = '#OSO_PV' item_prototype_key_count = 'disc.pv.count' item_prototype_key_available = 'disc.pv.available' dynamic_pv_count = defaultdict(int) dynamic_pv_available = defaultdict(int) for item in response['items']: # gather dynamic pv counts dynamic_pv_count[item['spec']['capacity']['storage']] += 1 #get count of each pv type available pv_types[item['status']['phase']] += 1 #get info for the capacity and capacity available capacity = item['spec']['capacity']['storage'] if item['status']['phase'] == 'Available': # get total available capacity pv_capacity_available = pv_capacity_available + int(capacity.replace('Gi', '')) # gather dynamic pv available counts dynamic_pv_available[item['spec']['capacity']['storage']] += 1 pv_capacity_total = pv_capacity_total + int(capacity.replace('Gi', '')) print "Total Persistent Volume Total count: %s" % len(response['items']) print 'Total Persistent Volume Capacity: %s' % pv_capacity_total print 'Total Persisten Volume Available Capacity: %s' % pv_capacity_available self.zagg_sender.add_zabbix_keys( {'openshift.master.pv.total.count' : len(response['items']), 'openshift.master.pv.space.total': pv_capacity_total, 'openshift.master.pv.space.available': pv_capacity_available}) for key, value in pv_types.iteritems(): print "Total Persistent Volume %s count: %s" % (key, value) self.zagg_sender.add_zabbix_keys( {'openshift.master.pv.%s.count' %key.lower() : value}) # Add dynamic items self.zagg_sender.add_zabbix_dynamic_item(discovery_key_pv, item_prototype_macro_pv, dynamic_pv_count.keys()) for size, count in dynamic_pv_count.iteritems(): print print "Total Persistent Volume %s count: %s" % (size, count) print "Total Persistent Volume available %s count: %s" % (size, dynamic_pv_available[size]) self.zagg_sender.add_zabbix_keys({"%s[%s]" %(item_prototype_key_count, size) : count, "%s[%s]" %(item_prototype_key_available, size) : dynamic_pv_available[size]}) def nodes_not_ready(self): """ check the number of nodes in the cluster that are not ready""" print "\nPerforming nodes not ready check..." response = self.ora.get('/api/v1/nodes') nodes_not_schedulable = [] for n in response['items']: if "unschedulable" in n['spec']: nodes_not_schedulable.append(n) nodes_not_ready = [] for n in response['items']: has_ready_status = False for cond in n['status']['conditions']: if cond['reason'] == "KubeletReady": has_ready_status = True if cond['status'].lower() != "true": nodes_not_ready.append(n) if has_ready_status == False: nodes_not_ready.append(n) print "Count of nodes not schedulable: %s" % len(nodes_not_schedulable) print "Count of nodes not ready: %s" % len(nodes_not_ready) self.zagg_sender.add_zabbix_keys( {'openshift.master.nodesnotready.count' : len(nodes_not_ready)}) self.zagg_sender.add_zabbix_keys( {'openshift.master.nodesnotschedulable.count' : len(nodes_not_schedulable)})
class OpsZaggClient(object): """ class to send data to zagg """ def __init__(self): self.zagg_sender = None self.args = None self.config = None self.pcp_metrics = [] self.heartbeat = None def run(self): """ main function to run the script """ self.parse_args() self.parse_config(self.args.config_file) self.config_zagg_sender() if self.args.send_pcp_metrics: self.add_pcp_metrics() if self.args.send_heartbeat: self.add_heartbeat() if self.args.key and self.args.value: self.add_zabbix_key() if self.args.discovery_key and self.args.macro_string and self.args.macro_names: self.add_zabbix_dynamic_item() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Zagg metric sender') parser.add_argument('--send-pcp-metrics', help="send pcp metrics to zagg", action="store_true") parser.add_argument('--send-heartbeat', help="send heartbeat metric to zagg", action="store_true") parser.add_argument('-s', '--host', help='specify host name as registered in Zabbix') parser.add_argument('-z', '--zagg-url', help='url of Zagg server') parser.add_argument('--zagg-user', help='username of the Zagg server') parser.add_argument('--zagg-pass', help='Password of the Zagg server') parser.add_argument('--zagg-ssl-verify', default=None, help='Whether to verify ssl certificates.') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') parser.add_argument('-c', '--config-file', help='ops-zagg-client config file', default='/etc/openshift_tools/zagg_client.yaml') key_value_group = parser.add_argument_group('Sending a Key-Value Pair') key_value_group.add_argument('-k', '--key', help='zabbix key') key_value_group.add_argument('-o', '--value', help='zabbix value') low_level_discovery_group = parser.add_argument_group('Sending a Low Level Discovery Item') low_level_discovery_group.add_argument('--discovery-key', help='discovery key') low_level_discovery_group.add_argument('--macro-string', help='macro string') low_level_discovery_group.add_argument('--macro-names', help='comma separated list of macro names') self.args = parser.parse_args() def parse_config(self, config_file): """ parse config file """ self.config = yaml.load(file(config_file)) def config_zagg_sender(self): """ configure the zagg_sender """ zagg_url = self.args.zagg_url if self.args.zagg_url else self.config['zagg']['url'] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user'] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass'] zagg_verbose = self.args.verbose if self.args.verbose else self.config['zagg']['verbose'] zagg_debug = self.args.debug if self.args.debug else self.config['zagg']['debug'] zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config['zagg']['ssl_verify'] host = self.args.host if self.args.host else self.config['host']['name'] if isinstance(zagg_verbose, str): zagg_verbose = (zagg_verbose == 'True') if isinstance(zagg_debug, str): zagg_debug = (zagg_debug == 'True') if isinstance(zagg_ssl_verify, str): zagg_ssl_verify = (zagg_ssl_verify == 'True') zagg_conn = ZaggConnection(url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug, ) self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug) def add_heartbeat(self): """ crate a hearbeat metric """ heartbeat = ZaggHeartbeat(templates=self.config['heartbeat']['templates'], hostgroups=self.config['heartbeat']['hostgroups'], ) self.zagg_sender.add_heartbeat(heartbeat) def add_pcp_metrics(self): """ collect pcp metrics to send to ZaggSender """ self.zagg_sender.add_pcp_metrics(self.config['pcp']['metrics']) def add_zabbix_key(self): """ send zabbix key/value pair to zagg """ self.zagg_sender.add_zabbix_keys({self.args.key : self.args.value}) def add_zabbix_dynamic_item(self): """ send zabbix low level discovery item to zagg """ self.zagg_sender.add_zabbix_dynamic_item(self.args.discovery_key, self.args.macro_string, self.args.macro_names.split(','), )
class EtcdStatusZaggSender(object): """ class to gather all metrics from etcd daemons """ def __init__(self): self.api_host = None self.args = None self.parser = None self.config = None self.etcd_ping = 0 self.default_config = '/etc/openshift_tools/etcd_metrics.yml' self.zagg_sender = ZaggSender() def parse_args(self): '''Parse the arguments for this script''' self.parser = argparse.ArgumentParser(description="Script that gathers metrics from etcd") self.parser.add_argument('-d', '--debug', default=False, action="store_true", help="debug mode") self.parser.add_argument('-v', '--verbose', default=False, action="store_true", help="Verbose?") self.parser.add_argument('-t', '--test', default=False, action="store_true", help="Run the script but don't send to zabbix") self.parser.add_argument('-c', '--configfile', default=self.default_config, help="Config file that contains metrics to be collected, defaults to etcd_metrics.yml") self.args = self.parser.parse_args() def call_etcd_api(self, rest_path): '''Makes the API calls to rest endpoints in etcd''' try: response = requests.get(self.api_host + rest_path, cert=(self.config['etcd_info']['files']['ssl_client_cert'], self.config['etcd_info']['files']['ssl_client_key']), verify=False) self.etcd_ping = 1 except requests.exceptions.ConnectionError as ex: print "ERROR talking to etcd API: {0}".format(ex.message) else: return response.content def json_metric(self, met): '''process json data from etcd''' return_data = {} api_response = self.call_etcd_api(met['path']) if api_response: content = json.loads(api_response) for item in met['values']: return_data[met['prefix'] + item['zab_key']] = content[item['src']] return return_data def text_metric(self, met): '''process text value from etcd''' return_data = {} content = self.call_etcd_api(met['path']) if content: for metric in text_string_to_metric_families(content): # skipping histogram and summary types unless we find a good way to add them to zabbix (unlikely) if metric.type in ['histogram', 'summary']: continue elif metric.type in ['counter', 'gauge'] and metric.name in met['values']: zab_metric_name = met['prefix'] + metric.name.replace('_', '.') if len(metric.samples) > 1: if met['values'][metric.name]: sub_key = met['values'][metric.name] for singlemetric in metric.samples: return_data['{0}.{1}'.format(zab_metric_name, singlemetric[1][sub_key])] = singlemetric[2] else: return_data[zab_metric_name] = metric.samples[0][2] else: if self.args.debug: print 'Got unknown type of metric from etcd, skipping it: ({0}) '.format(metric.type) return return_data def run(self): ''' Get data from etcd API ''' self.parse_args() try: with open(self.args.configfile, 'r') as configfile: self.config = yaml.load(configfile) except IOError as ex: print 'There was a problem opening the config file: {0}'.format(ex) print 'Exiting' sys.exit(1) # find out the etcd port try: with open(self.config['etcd_info']['files']['openshift_master_config'], 'r') as f: om_config = yaml.load(f) except IOError as ex: print 'Problem opening openshift master config: {0}'.format(ex) sys.exit(2) else: self.api_host = om_config["etcdClientInfo"]["urls"][0] # let's get the metrics for metric in self.config['etcd_info']['metrics']: if metric['type'] == 'text': self.zagg_sender.add_zabbix_keys(self.text_metric(metric)) elif metric['type'] == 'json': self.zagg_sender.add_zabbix_keys(self.json_metric(metric)) self.send_zagg_data() def send_zagg_data(self): ''' Sending the data to zagg or displaying it in console when test option is used ''' self.zagg_sender.add_zabbix_keys({'openshift.master.etcd.ping' : self.etcd_ping}) if not self.args.test: self.zagg_sender.send_metrics() else: self.zagg_sender.print_unique_metrics()
cli = AutoVersionClient(base_url='unix://var/run/docker.sock', timeout=120) bad_dns_count = 0 for ctr in cli.containers(): try: exec_id = cli.exec_create(container=ctr['Id'], cmd="getent hosts redhat.com") results = cli.exec_start(exec_id=exec_id) exit_code = cli.exec_inspect(exec_id)['ExitCode'] except APIError: # could race from getting a container list and the container exiting # before we can exec on it, so just ignore exited containers continue if exit_code == CMD_NOT_FOUND: continue print "Container: " + ctr['Image'] print results print "Exit Code: " + str(exit_code) + "\n" if exit_code != 0: bad_dns_count += 1 zs = ZaggSender() zs.add_zabbix_keys({ZBX_KEY: bad_dns_count}) print "Sending these metrics:" print ZBX_KEY + ": " + str(bad_dns_count) zs.send_metrics() print "\nDone.\n"
class OpsZaggClient(object): """ class to send data to zagg """ def __init__(self): self.zagg_sender = None self.args = None self.config = None self.pcp_metrics = [] self.heartbeat = None def run(self): """ main function to run the script """ self.parse_args() self.parse_config(self.args.config_file) self.config_zagg_sender() if self.args.send_pcp_metrics: self.add_pcp_metrics() if self.args.send_heartbeat: self.add_heartbeat() if self.args.key and self.args.value: self.add_zabbix_key() if self.args.discovery_key and self.args.macro_string and self.args.macro_names: self.add_zabbix_dynamic_item() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Zagg metric sender') parser.add_argument('--send-pcp-metrics', help="send pcp metrics to zagg", action="store_true") parser.add_argument('--send-heartbeat', help="send heartbeat metric to zagg", action="store_true") parser.add_argument('-s', '--host', help='specify host name as registered in Zabbix') parser.add_argument('-z', '--zagg-url', help='url of Zagg server') parser.add_argument('--zagg-user', help='username of the Zagg server') parser.add_argument('--zagg-pass', help='Password of the Zagg server') parser.add_argument('--zagg-ssl-verify', default=None, help='Whether to verify ssl certificates.') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') parser.add_argument('-c', '--config-file', help='ops-zagg-client config file', default='/etc/openshift_tools/zagg_client.yaml') key_value_group = parser.add_argument_group('Sending a Key-Value Pair') key_value_group.add_argument('-k', '--key', help='zabbix key') key_value_group.add_argument('-o', '--value', help='zabbix value') low_level_discovery_group = parser.add_argument_group( 'Sending a Low Level Discovery Item') low_level_discovery_group.add_argument('--discovery-key', help='discovery key') low_level_discovery_group.add_argument('--macro-string', help='macro string') low_level_discovery_group.add_argument( '--macro-names', help='comma separated list of macro names') self.args = parser.parse_args() def parse_config(self, config_file): """ parse config file """ self.config = yaml.load(file(config_file)) def config_zagg_sender(self): """ configure the zagg_sender """ zagg_url = self.args.zagg_url if self.args.zagg_url else self.config[ 'zagg']['url'] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config[ 'zagg']['user'] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config[ 'zagg']['pass'] zagg_verbose = self.args.verbose if self.args.verbose else self.config[ 'zagg']['verbose'] zagg_debug = self.args.debug if self.args.debug else self.config[ 'zagg']['debug'] zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config[ 'zagg']['ssl_verify'] host = self.args.host if self.args.host else self.config['host']['name'] if isinstance(zagg_verbose, str): zagg_verbose = (zagg_verbose == 'True') if isinstance(zagg_debug, str): zagg_debug = (zagg_debug == 'True') if isinstance(zagg_ssl_verify, str): zagg_ssl_verify = (zagg_ssl_verify == 'True') zagg_conn = ZaggConnection( url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug, ) self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug) def add_heartbeat(self): """ crate a hearbeat metric """ heartbeat = ZaggHeartbeat( templates=self.config['heartbeat']['templates'], hostgroups=self.config['heartbeat']['hostgroups'], ) self.zagg_sender.add_heartbeat(heartbeat) def add_pcp_metrics(self): """ collect pcp metrics to send to ZaggSender """ self.zagg_sender.add_pcp_metrics(self.config['pcp']['metrics']) def add_zabbix_key(self): """ send zabbix key/value pair to zagg """ self.zagg_sender.add_zabbix_keys({self.args.key: self.args.value}) def add_zabbix_dynamic_item(self): """ send zabbix low level discovery item to zagg """ self.zagg_sender.add_zabbix_dynamic_item( self.args.discovery_key, self.args.macro_string, self.args.macro_names.split(','), )
ITEM_PROTOTYPE_KEY_INODE = 'disc.filesys.inodes.pused' def filter_out_docker_filesystems(metric_dict, filesystem_filter): """ Simple filter to elimate unnecessary characters in the key name """ filtered_dict = {k.replace(filesystem_filter, ''):v for (k, v) in metric_dict.iteritems() if 'docker' not in k } return filtered_dict # Get the disk space FILESYS_FULL_METRICS = pminfo.get_metrics(FILESYS_FULL_METRIC) FILTERED_FILESYS_METRICS = filter_out_docker_filesystems(FILESYS_FULL_METRICS, 'filesys.full.') ZS.add_zabbix_dynamic_item(DISCOVERY_KEY_FS, ITEM_PROTOTYPE_MACRO_FS, FILTERED_FILESYS_METRICS.keys()) for filesys_name, filesys_full in FILTERED_FILESYS_METRICS.iteritems(): ZS.add_zabbix_keys({'%s[%s]' % (ITEM_PROTOTYPE_KEY_FULL, filesys_name): filesys_full}) # Get filesytem inode metrics FILESYS_INODE_METRICS = pminfo.get_metrics(derived_metrics=FILESYS_INODE_DERIVED_METRICS) FILTERED_FILESYS_INODE_METRICS = filter_out_docker_filesystems(FILESYS_INODE_METRICS, 'filesys.inodes.pused.') for filesys_name, filesys_inodes in FILTERED_FILESYS_INODE_METRICS.iteritems(): ZS.add_zabbix_keys({'%s[%s]' % (ITEM_PROTOTYPE_KEY_INODE, filesys_name): filesys_inodes}) ZS.send_metrics()
def main(): ''' Get data from etcd API ''' SSL_CLIENT_CERT = '/etc/openshift/master/master.etcd-client.crt' SSL_CLIENT_KEY = '/etc/openshift/master/master.etcd-client.key' OPENSHIFT_MASTER_CONFIG = '/etc/openshift/master/master-config.yaml' # find out the etcd port with open(OPENSHIFT_MASTER_CONFIG, 'r') as f: config = yaml.load(f) API_HOST = config["etcdClientInfo"]["urls"][0] # define the store API URL API_URL = API_HOST + "/v2/stats/store" zs = ZaggSender() # Fetch the store statics from API try: request = requests.get(API_URL, cert=(SSL_CLIENT_CERT, SSL_CLIENT_KEY), verify=False) content = json.loads(request.content) etcd_ping = 1 # parse the items and add it as metrics zs.add_zabbix_keys({'openshift.master.etcd.create.success' : content['createSuccess']}) zs.add_zabbix_keys({'openshift.master.etcd.create.fail' : content['createFail']}) zs.add_zabbix_keys({'openshift.master.etcd.delete.success' : content['deleteSuccess']}) zs.add_zabbix_keys({'openshift.master.etcd.delete.fail' : content['deleteFail']}) zs.add_zabbix_keys({'openshift.master.etcd.get.success' : content['getsSuccess']}) zs.add_zabbix_keys({'openshift.master.etcd.get.fail' : content['getsFail']}) zs.add_zabbix_keys({'openshift.master.etcd.set.success' : content['setsSuccess']}) zs.add_zabbix_keys({'openshift.master.etcd.set.fail' : content['setsFail']}) zs.add_zabbix_keys({'openshift.master.etcd.update.success' : content['updateSuccess']}) zs.add_zabbix_keys({'openshift.master.etcd.update.fail' : content['updateFail']}) zs.add_zabbix_keys({'openshift.master.etcd.watchers' : content['watchers']}) except requests.exceptions.ConnectionError as ex: print "ERROR talking to etcd API: %s" % ex.message etcd_ping = 0 zs.add_zabbix_keys({'openshift.master.etcd.ping' : etcd_ping}) # Finally, sent them to zabbix zs.send_metrics()
class OpenshiftWebServiceChecker(object): """ Checks for Openshift Pods """ def __init__(self): self.args = None self.ora = None self.zagg_sender = None self.service_ip = None self.service_port = '443' def run(self): """ Main function to run the check """ self.parse_args() self.ora = OpenshiftRestApi() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: self.get_service() status = self.check_service() except Exception as ex: print "Problem retreiving data: %s " % ex.message self.zagg_sender.add_zabbix_keys({ "openshift.webservice.{}.status".format(self.args.pod) : status}) self.zagg_sender.send_metrics() def get_service(self): """ Gets the service for a pod """ print "\nLooking up services for pod\n" api_url = "/api/v1/services" if (str(self.args.namespace) != "None") & \ (str(self.args.namespace) != "all"): api_url = '/api/v1/namespaces/{}/services'.format(self.args.namespace) print "using api url {}".format(api_url) api_yaml = self.ora.get(api_url, rtype='text') services = yaml.safe_load(api_yaml) for service in services["items"]: if self.args.pod and \ self.args.pod in service["metadata"]["name"]: print "service IP is {}".format(service["spec"]["clusterIP"]) self.service_ip = service["spec"]["clusterIP"] if self.args.portname != None: for port in service["spec"]["ports"]: if port["name"] == self.args.portname: self.service_port = port["port"] else: self.service_port = service["spec"]["ports"][0]["port"] else: pass def check_service(self): """ Checks the web service """ print "\nChecking web service\n" if self.args.insecure: proto = 'http' else: proto = 'https' url = '{}://{}:{}/{}'.format( proto, self.service_ip, self.service_port, self.args.url, ) try: print "Performing check on URL: {}".format(url) response = urllib2.urlopen(url, timeout=30) if str(response.getcode()) == self.args.status: if self.args.content == None \ or self.args.content in response.read(): return True except urllib2.URLError: print "Received error accessing URL: {}".format(url) except socket.timeout: print "Timed out accessing URL: {}".format(url) return False def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Openshift pod sender') parser.add_argument('-p', '--pod', default=None, help='Check for pod with this specific name') parser.add_argument('-n', '--namespace', default=None, help='Check for pods in this namespace - "all" for all') parser.add_argument('-P', '--portname', default=None, help='name of the port to check') parser.add_argument('-u', '--url', default="/", help='URL to check. Defaults to "/".') parser.add_argument('-s', '--status', default="200", help='HTTP status code to expect. Defaults to 200') parser.add_argument('-c', '--content', default=None, help='Looks for a string in the content of the response.') parser.add_argument('-i', '--insecure', help='Use insecure http connection') parser.add_argument('-S', '--secure', help='Use secure https connection (default)') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args()
dw_dds = dw.get_disk_usage() keys = { 'docker.storage.data.space.used': dw_dds.data_space_used, 'docker.storage.data.space.available': dw_dds.data_space_available, 'docker.storage.data.space.percent_available': dw_dds.data_space_percent_available, 'docker.storage.data.space.total': dw_dds.data_space_total, 'docker.storage.metadata.space.used': dw_dds.metadata_space_used, 'docker.storage.metadata.space.available': dw_dds.metadata_space_available, 'docker.storage.metadata.space.percent_available': dw_dds.metadata_space_percent_available, 'docker.storage.metadata.space.total': dw_dds.metadata_space_total, 'docker.storage.is_loopback': int(dw_dds.is_loopback), 'docker.ping': 1, # Docker is up } except DockerException as ex: print "\nERROR talking to docker: %s\n" % ex.message keys = { 'docker.ping': 0, # Docker is down } exit_code = 10 zs.add_zabbix_keys(keys) print "Sending these metrics:" print json.dumps(keys, indent=4) zs.send_metrics() print "\nDone.\n" sys.exit(exit_code)
class OpsZaggClient(object): """ class to send data to zagg """ def __init__(self): self.zagg_sender = None self.args = None self.config = None self.heartbeat = None def run(self): """ main function to run the script """ self.parse_args() self.parse_config(self.args.config_file) self.config_zagg_sender() if self.args.send_heartbeat: self.add_heartbeat() if self.args.key and self.args.value: self.add_zabbix_key() if self.args.discovery_key and self.args.macro_string and self.args.macro_names: self.add_zabbix_dynamic_item() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description="Zagg metric sender") parser.add_argument("--send-heartbeat", help="send heartbeat metric to zagg", action="store_true") parser.add_argument("-s", "--host", help="specify host name as registered in Zabbix") parser.add_argument("-z", "--zagg-url", help="url of Zagg server") parser.add_argument("--zagg-user", help="username of the Zagg server") parser.add_argument("--zagg-pass", help="Password of the Zagg server") parser.add_argument("--zagg-ssl-verify", default=None, help="Whether to verify ssl certificates.") parser.add_argument("-v", "--verbose", action="store_true", default=None, help="Verbose?") parser.add_argument("--debug", action="store_true", default=None, help="Debug?") parser.add_argument( "-c", "--config-file", help="ops-zagg-client config file", default="/etc/openshift_tools/zagg_client.yaml" ) key_value_group = parser.add_argument_group("Sending a Key-Value Pair") key_value_group.add_argument("-k", "--key", help="zabbix key") key_value_group.add_argument("-o", "--value", help="zabbix value") low_level_discovery_group = parser.add_argument_group("Sending a Low Level Discovery Item") low_level_discovery_group.add_argument("--discovery-key", help="discovery key") low_level_discovery_group.add_argument("--macro-string", help="macro string") low_level_discovery_group.add_argument("--macro-names", help="comma separated list of macro names") self.args = parser.parse_args() def parse_config(self, config_file): """ parse config file """ self.config = yaml.load(file(config_file)) def config_zagg_sender(self): """ configure the zagg_sender """ zagg_url = self.args.zagg_url if self.args.zagg_url else self.config["zagg"]["url"] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config["zagg"]["user"] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config["zagg"]["pass"] zagg_verbose = self.args.verbose if self.args.verbose else self.config["zagg"]["verbose"] zagg_debug = self.args.debug if self.args.debug else self.config["zagg"]["debug"] zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config["zagg"]["ssl_verify"] host = self.args.host if self.args.host else self.config["host"]["name"] if isinstance(zagg_verbose, str): zagg_verbose = zagg_verbose == "True" if isinstance(zagg_debug, str): zagg_debug = zagg_debug == "True" if isinstance(zagg_ssl_verify, str): zagg_ssl_verify = zagg_ssl_verify == "True" zagg_conn = ZaggConnection( url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug ) self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug) def add_heartbeat(self): """ crate a hearbeat metric """ heartbeat = ZaggHeartbeat( templates=self.config["heartbeat"]["templates"], hostgroups=self.config["heartbeat"]["hostgroups"] ) self.zagg_sender.add_heartbeat(heartbeat) def add_zabbix_key(self): """ send zabbix key/value pair to zagg """ self.zagg_sender.add_zabbix_keys({self.args.key: self.args.value}) def add_zabbix_dynamic_item(self): """ send zabbix low level discovery item to zagg """ self.zagg_sender.add_zabbix_dynamic_item( self.args.discovery_key, self.args.macro_string, self.args.macro_names.split(",") )
class OpenshiftDockerRegigtryChecker(object): """ Checks for the Openshift Cluster Docker Registry """ def __init__(self): self.args = None self.zagg_sender = None self.docker_hosts = [] self.docker_port = None # Assume secure registry self.docker_protocol = 'https' self.docker_service_ip = None self.kubeconfig = None def get_kubeconfig(self): ''' Find kubeconfig to use for OCUtil ''' # Default master kubeconfig kubeconfig = '/etc/origin/master/admin.kubeconfig' non_master_kube_dir = '/etc/origin/node' if os.path.isdir(non_master_kube_dir): for my_file in os.listdir(non_master_kube_dir): if my_file.endswith(".kubeconfig"): kubeconfig = os.path.join(non_master_kube_dir, my_file) if self.args.debug: print "Using kubeconfig: {}".format(kubeconfig) self.kubeconfig = kubeconfig def run(self): """ Main function to run the check """ self.parse_args() self.get_kubeconfig() ocutil = OCUtil(config_file=self.kubeconfig, verbose=self.args.verbose) self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: oc_yaml = ocutil.get_service('docker-registry') self.get_registry_service(oc_yaml) oc_yaml = ocutil.get_endpoint('docker-registry') self.get_registry_endpoints(oc_yaml) except Exception as ex: print "Problem retreiving registry IPs: %s " % ex.message self.registry_service_check() self.registry_health_check() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Openshift Cluster Docker Registry sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') self.args = parser.parse_args() def get_registry_service(self, service_yaml): ''' This will get the service IP of the docker registry ''' print "\nGetting Docker Registry service IP..." service = yaml.safe_load(service_yaml) self.docker_service_ip = str(service['spec']['clusterIP']) def get_registry_endpoints(self, endpoint_yaml): """ This will return the docker registry endpoint IPs that are being served inside of kubernetes. """ print "\nFinding the Docker Registry pods via Openshift API calls..." endpoints = yaml.safe_load(endpoint_yaml) self.docker_port = str(endpoints['subsets'][0]['ports'][0]['port']) for address in endpoints['subsets'][0]['addresses']: self.docker_hosts.append(address['ip']) def healthy_registry(self, ip_addr, port, secure=True): ''' Test a specific registry URL In v3.0.2.0, http://registry.url/healthz worked. The '/healthz' was something added by openshift to the docker registry. This should return a http status code of 200 and text of {} (empty json). In 3.1.1 and on, '/' should work and return a 200 to indicate that the registry is up and running. Please see the following url for more info. Look under load balancer health checks: https://github.com/docker/distribution/blob/master/docs/deploying.md#running-a-domain-registry ''' proto = self.docker_protocol if not secure: proto = 'http' url = '{}://{}:{}/'.format(proto, ip_addr, port) try: print "Performing Docker Registry check on URL: {}".format(url) response = urllib2.urlopen(url, timeout=20) if response.getcode() == 200: return True except urllib2.URLError: print "Received error accessing URL: {}".format(url) except socket.timeout: print "Timed out accessing URL: {}".format(url) # Try with /healthz try: url = url + 'healthz' print "Performing Docker Registry check on URL: {}".format(url) response = urllib2.urlopen(url, timeout=20) if response.getcode() == 200: return True except urllib2.URLError: print "Received error access URL: {}".format(url) except socket.timeout: print "Timed out accessing URL: {}".format(url) # We tried regular and 'healthz' URLs. Registry inaccessible. return False def registry_service_check(self): ''' Test and report on health of Docker Registry service ''' status = '0' # Skip if we failed to fetch a valid service IP if self.docker_service_ip != None: if self.healthy_registry(self.docker_service_ip, self.docker_port): status = '1' elif self.healthy_registry(self.docker_service_ip, self.docker_port, secure=False): status = '1' print "\nDocker Registry service status: {}".format(status) self.zagg_sender.add_zabbix_keys({'openshift.node.registry.service.ping' : status}) def registry_health_check(self): """ Check the registry's / URL """ healthy_registries = 0 for host in self.docker_hosts: if self.healthy_registry(host, self.docker_port): healthy_registries += 1 elif self.healthy_registry(host, self.docker_port, secure=False): healthy_registries += 1 healthy_pct = 0 if len(self.docker_hosts) > 0: healthy_pct = (healthy_registries / len(self.docker_hosts) *100) print "\n%s of %s registry PODs are healthy\n" %(healthy_registries, len(self.docker_hosts)) self.zagg_sender.add_zabbix_keys({'openshift.node.registry-pods.healthy_pct' : healthy_pct})
class OpenshiftMasterZaggClient(object): """ Checks for the Openshift Master """ def __init__(self): self.args = None self.zagg_sender = None self.ora = OpenshiftRestApi() def run(self): """ Main function to run the check """ self.parse_args() self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug) try: if self.args.healthz or self.args.all_checks: self.healthz_check() except Exception as ex: print "Problem performing healthz check: %s " % ex.message self.zagg_sender.add_zabbix_keys({'openshift.master.api.healthz' : 'false'}) try: if self.args.api_ping or self.args.all_checks: self.api_ping() if self.args.project_count or self.args.all_checks: self.project_count() if self.args.pod_count or self.args.all_checks: self.pod_count() if self.args.user_count or self.args.all_checks: self.user_count() except Exception as ex: print "Problem Openshift API checks: %s " % ex.message self.zagg_sender.add_zabbix_keys({'openshift.master.api.ping' : 0}) # Openshift API is down try: if self.args.metrics or self.args.all_checks: self.metric_check() except Exception as ex: print "Problem getting Openshift metrics at /metrics: %s " % ex.message self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Network metric sender') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') master_check_group = parser.add_argument_group('Different Checks to Perform') master_check_group.add_argument('--all-checks', action='store_true', default=None, help='Do all of the checks') master_check_group.add_argument('--api-ping', action='store_true', default=None, help='Verify the Openshift API is alive') master_check_group.add_argument('--healthz', action='store_true', default=None, help='Query the Openshift Master API /healthz') master_check_group.add_argument('--metrics', action='store_true', default=None, help='Query the Openshift Master Metrics at /metrics') master_check_group.add_argument('--project-count', action='store_true', default=None, help='Query the Openshift Master for Number of Pods') master_check_group.add_argument('--pod-count', action='store_true', default=None, help='Query the Openshift Master for Number of Running Pods') master_check_group.add_argument('--user-count', action='store_true', default=None, help='Query the Openshift Master for Number of Users') self.args = parser.parse_args() def api_ping(self): """ Verify the Openshift API health is responding correctly """ print "\nPerforming Openshift API ping check..." response = self.ora.get('/api/v1/nodes') print "\nOpenshift API ping is alive" print "Number of nodes in the Openshift cluster: %s" % len(response['items']) self.zagg_sender.add_zabbix_keys({'openshift.master.api.ping' : 1, 'openshift.master.node.count': len(response['items'])}) def healthz_check(self): """ check the /healthz API call """ print "\nPerforming /healthz check..." response = self.ora.get('/healthz', rtype='text') print "healthz check returns: %s " %response self.zagg_sender.add_zabbix_keys({'openshift.master.api.healthz' : str('ok' in response).lower()}) def metric_check(self): """ collect certain metrics from the /metrics API call """ print "\nPerforming /metrics check..." response = self.ora.get('/metrics', rtype='text') for metric_type in text_string_to_metric_families(response): # Collect the apiserver_request_latencies_summary{resource="pods",verb="LIST",quantiles in /metrics # Collect the apiserver_request_latencies_summary{resource="pods",verb="WATCHLIST",quantiles in /metrics if metric_type.name == 'apiserver_request_latencies_summary': key_str = 'openshift.master.apiserver.latency.summary' for sample in metric_type.samples: if (sample[1]['resource'] == 'pods' and sample[1].has_key('quantile') and 'LIST' in sample[1]['verb']): curr_key_str = key_str + ".pods.quantile.%s.%s" % (sample[1]['verb'], sample[1]['quantile'].split('.')[1]) if math.isnan(sample[2]): value = 0 else: value = sample[2] self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)}) # Collect the scheduler_e2e_scheduling_latency_microseconds{quantiles in /metrics if metric_type.name == 'scheduler_e2e_scheduling_latency_microseconds': for sample in metric_type.samples: if sample[1].has_key('quantile'): key_str = 'openshift.master.scheduler.e2e.scheduling.latency' curr_key_str = key_str + ".quantile.%s" % (sample[1]['quantile'].split('.')[1]) if math.isnan(sample[2]): value = 0 else: value = sample[2] self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)}) self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 1}) # def project_count(self): """ check the number of projects in Openshift """ print "\nPerforming project count check..." excluded_names = ['openshift', 'openshift-infra', 'default', 'ops-monitor'] response = self.ora.get('/oapi/v1/projects') project_names = [project['metadata']['name'] for project in response['items']] valid_names = set(project_names) - set(excluded_names) print "Project count: %s" % len(valid_names) self.zagg_sender.add_zabbix_keys({'openshift.project.count' : len(valid_names)}) def pod_count(self): """ check the number of pods in Openshift """ print "\nPerforming pod count check..." response = self.ora.get('/api/v1/pods') # Get running pod count running_pod_count = 0 for i in response['items']: if 'containerStatuses' in i['status']: if 'running' in i['status']['containerStatuses'][0]['state']: running_pod_count += 1 # Get running pod count on compute only nodes (non-infra) running_user_pod_count = 0 for i in response['items']: if 'containerStatuses' in i['status']: if 'running' in i['status']['containerStatuses'][0]['state']: if 'nodeSelector' in i['spec']: if i['spec']['nodeSelector']['type'] == 'compute': running_user_pod_count += 1 print "Total pod count: %s" % len(response['items']) print "Running pod count: %s" % running_pod_count print "User Running pod count: %s" % running_user_pod_count self.zagg_sender.add_zabbix_keys({'openshift.master.pod.running.count' : running_pod_count, 'openshift.master.pod.user.running.count' : running_user_pod_count, 'openshift.master.pod.total.count' : len(response['items'])}) def user_count(self): """ check the number of users in Openshift """ print "\nPerforming user count check..." response = self.ora.get('/oapi/v1/users') print "Total user count: %s" % len(response['items']) self.zagg_sender.add_zabbix_keys({'openshift.master.user.count' : len(response['items'])})
""" docker container DNS tester """ # Adding the ignore because it does not like the naming of the script # to be different than the class name # pylint: disable=invalid-name from docker import AutoVersionClient from openshift_tools.monitoring.zagg_sender import ZaggSender ZBX_KEY = "docker.container.dns.resolution" if __name__ == "__main__": cli = AutoVersionClient(base_url="unix://var/run/docker.sock") container = cli.create_container( image="docker-registry.ops.rhcloud.com/ops/oso-rhel7-host-monitoring", command="getent hosts redhat.com" ) cli.start(container=container.get("Id")) exit_code = cli.wait(container) cli.remove_container(container.get("Id")) zs = ZaggSender() zs.add_zabbix_keys({ZBX_KEY: exit_code}) print "Sending these metrics:" print ZBX_KEY + ": " + str(exit_code) zs.send_metrics() print "\nDone.\n"
class OpsZaggPCPClient(object): """ class to send data to zagg """ def __init__(self): self.zagg_sender = None self.args = None self.config = None self.pcp_metrics = [] self.heartbeat = None def run(self): """ main function to run the script """ self.parse_args() self.parse_config(self.args.config_file) self.config_zagg_sender() if self.args.metrics: self.add_metrics() self.add_metrics_from_config() self.zagg_sender.send_metrics() def parse_args(self): """ parse the args from the cli """ parser = argparse.ArgumentParser(description='Zagg PCP metric sender') parser.add_argument('--send-pcp-metrics', help="send pcp metrics to zagg", action="store_true") parser.add_argument('-m', '--metrics', help="send PCP metrics to zagg") parser.add_argument('-s', '--host', help='specify host name as registered in Zabbix') parser.add_argument('-z', '--zagg-url', help='url of Zagg server') parser.add_argument('--zagg-user', help='username of the Zagg server') parser.add_argument('--zagg-pass', help='Password of the Zagg server') parser.add_argument('--zagg-ssl-verify', default=None, help='Whether to verify ssl certificates.') parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?') parser.add_argument('--debug', action='store_true', default=None, help='Debug?') parser.add_argument('-c', '--config-file', help='ops-zagg-client config file', default='/etc/openshift_tools/zagg_client.yaml') self.args = parser.parse_args() def parse_config(self, config_file): """ parse config file """ self.config = yaml.load(file(config_file)) def config_zagg_sender(self): """ configure the zagg_sender """ zagg_url = self.args.zagg_url if self.args.zagg_url else self.config['zagg']['url'] zagg_user = self.args.zagg_user if self.args.zagg_user else self.config['zagg']['user'] zagg_password = self.args.zagg_pass if self.args.zagg_pass else self.config['zagg']['pass'] zagg_verbose = self.args.verbose if self.args.verbose else self.config['zagg']['verbose'] zagg_debug = self.args.debug if self.args.debug else self.config['zagg']['debug'] zagg_ssl_verify = self.args.zagg_ssl_verify if self.args.zagg_ssl_verify else self.config['zagg']['ssl_verify'] host = self.args.host if self.args.host else self.config['host']['name'] if isinstance(zagg_verbose, str): zagg_verbose = (zagg_verbose == 'True') if isinstance(zagg_debug, str): zagg_debug = (zagg_debug == 'True') if isinstance(zagg_ssl_verify, str): zagg_ssl_verify = (zagg_ssl_verify == 'True') zagg_conn = ZaggConnection(url=zagg_url, user=zagg_user, password=zagg_password, ssl_verify=zagg_ssl_verify, debug=zagg_debug, ) self.zagg_sender = ZaggSender(host, zagg_conn, zagg_verbose, zagg_debug) def add_metrics_from_config(self): """ collect pcp metrics from a config file. Add to send to ZaggSender """ self.add_pcp_to_zagg_sender(self.config['pcp']['metrics']) def add_metrics(self): """ collect pcp metrics to send to ZaggSender """ metric_list = self.args.metrics.split(',') self.add_pcp_to_zagg_sender(metric_list) def add_pcp_to_zagg_sender(self, pcp_metrics): """ something pcp yada yada """ pcp_metric_dict = pminfo.get_metrics(metrics=pcp_metrics, derived_metrics=None) self.zagg_sender.add_zabbix_keys(pcp_metric_dict)