def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.metric_sender = MetricSender(verbose=self.args.verbose,
                                          debug=self.args.debug)

        status = None
        try:
            self.get_service()
            if not self.args.service_count:
                status = self.check_service()

        except Exception as ex:
            print "Problem retreiving data: %s " % ex.message

        if status:
            self.metric_sender.add_metric({
                "openshift.webservice.{}.status".format(self.args.pod):
                status
            })

        self.metric_sender.add_metric(
            {'openshift.cluster.service.count': self.servicecount},
            synthetic=True)
        self.metric_sender.send_metrics()
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.metric_sender = MetricSender(verbose=self.args.verbose,
                                          debug=self.args.debug)

        try:
            self.get_pods()

        except Exception as ex:
            print "Problem retreiving pod data: %s " % ex.message

        self.metric_sender.send_metrics()
 def __init__(self):
     self.args = None
     self.metric_sender = None
     self.ora = OpenshiftRestApi()
     self.dns_host = ''
     self.dns_port = 53
     self.openshift_services = []
    def run(self):
        '''  Main function to run the check '''

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose,
                                      debug=self.args.debug)

        master_cfg = []
        with open(self.args.master_config, 'r') as yml:
            master_cfg = yaml.load(yml)
        self.ora = OpenshiftRestApi(
            host=master_cfg['oauthConfig']['masterURL'], verify_ssl=True)

        self.cluster_capacity()

        if not self.args.dry_run:
            self.zagg_sender.send_metrics()
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.metric_sender = MetricSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            self.get_service()
            status = self.check_service()

        except Exception as ex:
            print "Problem retreiving data: %s " % ex.message

        self.metric_sender.add_metric({
            "openshift.webservice.{}.status".format(self.args.pod) : status})

        self.metric_sender.send_metrics()
class OpenshiftPodChecker(object):
    """ Checks for Openshift Pods """

    def __init__(self):
        self.args = None
        self.ora = None
        self.zagg_sender = None

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            self.get_pods()

        except Exception as ex:
            print "Problem retreiving pod data: %s " % ex.message

        self.zagg_sender.send_metrics()

    def get_pods(self):
        """ Gets pod data """

        print "\nPerforming pod check ...\n"

        api_url = "/api/v1/pods"
        if (str(self.args.namespace) != "None") & (str(self.args.namespace) != "all"):
            api_url = "/api/v1/namespaces/{}/pods".format(self.args.namespace)

        api_yaml = self.ora.get(api_url, rtype="text")
        pods = yaml.safe_load(api_yaml)

        pod_count = 0
        for pod in pods["items"]:
            if self.args.pod and self.args.pod in pod["metadata"]["name"]:
                print "status of {} is {}".format(pod["metadata"]["name"], pod["status"]["phase"])
                if pod["status"]["phase"] == "Running":
                    pod_count += 1
            else:
                pass

        self.zagg_sender.add_zabbix_keys({"service.pod.{}.count".format(self.args.pod): pod_count})

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description="Openshift pod sender")
        parser.add_argument("-p", "--pod", default=None, help="Check for pod with this specific name")
        parser.add_argument("-n", "--namespace", default=None, help='Check for pods in this namespace - "all" for all')
        parser.add_argument("-v", "--verbose", action="store_true", default=None, help="Verbose?")
        parser.add_argument("--debug", action="store_true", default=None, help="Debug?")

        self.args = parser.parse_args()
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            self.get_pods()

        except Exception as ex:
            print "Problem retreiving pod data: %s " % ex.message

        self.zagg_sender.send_metrics()
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            self.get_service()
            status = self.check_service()

        except Exception as ex:
            print "Problem retreiving data: %s " % ex.message

        self.zagg_sender.add_zabbix_keys({
            "openshift.webservice.{}.status".format(self.args.pod) : status})

        self.zagg_sender.send_metrics()
class OpenshiftMasterZaggClient(object):
    """ Checks for the Openshift Master """

    def __init__(self):
        self.args = None
        self.metric_sender = None
        self.ora = None
        self.zabbix_api_key = None
        self.zabbix_healthz_key = None

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.metric_sender = MetricSender(verbose=self.args.verbose, debug=self.args.debug)

        if self.args.local:
            self.ora = OpenshiftRestApi()
            self.args.api_ping = True
            self.args.healthz = True
            self.zabbix_api_key = 'openshift.master.local.api.ping'
            self.zabbix_healthz_key = 'openshift.master.local.api.healthz'
        else:
            master_cfg_from_yaml = []
            with open('/etc/origin/master/master-config.yaml', 'r') as yml:
                master_cfg_from_yaml = yaml.load(yml)
            self.ora = OpenshiftRestApi(host=master_cfg_from_yaml['oauthConfig']['masterURL'],
                                        verify_ssl=True)

            self.zabbix_api_key = 'openshift.master.api.ping'
            self.zabbix_healthz_key = 'openshift.master.api.healthz'

        try:
            if self.args.healthz or self.args.all_checks:
                self.healthz_check()

        except Exception as ex:
            print "Problem performing healthz check: %s " % ex.message
            self.metric_sender.add_metric({self.zabbix_healthz_key: 'false'})

        try:
            if self.args.api_ping or self.args.all_checks:
                self.api_ping()

            if self.args.project_count or self.args.all_checks:
                self.project_count()

            if self.args.pod_count or self.args.all_checks:
                self.pod_count()

            if self.args.user_count or self.args.all_checks:
                self.user_count()

            if self.args.pv_info or self.args.all_checks:
                self.pv_info()

            if self.args.node_checks or self.args.all_checks:
                self.nodes_not_schedulable()
                self.nodes_not_ready()
                self.nodes_not_labeled()

        except Exception as ex:
            print "Problem Openshift API checks: %s " % ex.message
            self.metric_sender.add_metric({self.zabbix_api_key: 0}) # Openshift API is down

        try:
            if self.args.metrics or self.args.all_checks:
                self.metric_check()

        except Exception as ex:
            print "Problem getting Openshift metrics at /metrics: %s " % ex.message
            self.metric_sender.add_metric({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down

        self.metric_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Network metric sender')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')
        parser.add_argument('-l', '--local', action='store_true', default=False,
                            help='Run local checks against the local API (https://127.0.0.1)')

        master_check_group = parser.add_argument_group('Different Checks to Perform')
        master_check_group.add_argument('--all-checks', action='store_true', default=None,
                                        help='Do all of the checks')

        master_check_group.add_argument('--api-ping', action='store_true', default=None,
                                        help='Verify the Openshift API is alive')

        master_check_group.add_argument('--healthz', action='store_true', default=None,
                                        help='Query the Openshift Master API /healthz')

        master_check_group.add_argument('--metrics', action='store_true', default=None,
                                        help='Query the Openshift Master Metrics at /metrics')

        master_check_group.add_argument('--project-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Pods')

        master_check_group.add_argument('--pod-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Running Pods')

        master_check_group.add_argument('--user-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Users')

        master_check_group.add_argument('--pv-info', action='store_true', default=None,
                                        help='Query the Openshift Master for Persistent Volumes Info')

        master_check_group.add_argument('--node-checks', action='store_true', default=None,
                                        help='Query the Openshift Master for node checks')

        self.args = parser.parse_args()

    def api_ping(self):
        """ Verify the Openshift API health is responding correctly """

        print "\nPerforming Openshift API ping check..."

        response = self.ora.get('/api/v1/nodes')
        print "\nOpenshift API ping is alive"
        print "Number of nodes in the Openshift cluster: %s" % len(response['items'])

        self.metric_sender.add_metric({self.zabbix_api_key: 1, 'openshift.master.node.count': len(response['items'])})

    def healthz_check(self):
        """ check the /healthz API call """

        print "\nPerforming /healthz check..."

        response = self.ora.get('/healthz', rtype='text')
        print "healthz check returns: %s " %response

        self.metric_sender.add_metric({self.zabbix_healthz_key: str('ok' in response).lower()})

    def metric_check(self):
        """ collect certain metrics from the /metrics API call """

        print "\nPerforming /metrics check..."
        response = self.ora.get('/metrics', rtype='text')

        for metric_type in text_string_to_metric_families(response):

            # Collect the apiserver_request_latencies_summary{resource="pods",verb="LIST",quantiles in /metrics
            # Collect the apiserver_request_latencies_summary{resource="pods",verb="WATCHLIST",quantiles in /metrics
            if metric_type.name == 'apiserver_request_latencies_summary':
                key_str = 'openshift.master.apiserver.latency.summary'
                for sample in metric_type.samples:
                    if (sample[1]['resource'] == 'pods'
                            and sample[1].has_key('quantile')
                            and 'LIST' in sample[1]['verb']):
                        curr_key_str = key_str + ".pods.quantile.%s.%s" % (sample[1]['verb'],
                                                                           sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.metric_sender.add_metric({curr_key_str.lower(): int(value/1000)})

            # Collect the scheduler_e2e_scheduling_latency_microseconds{quantiles in /metrics
            if metric_type.name == 'scheduler_e2e_scheduling_latency_microseconds':
                for sample in metric_type.samples:
                    if sample[1].has_key('quantile'):
                        key_str = 'openshift.master.scheduler.e2e.scheduling.latency'
                        curr_key_str = key_str + ".quantile.%s" % (sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.metric_sender.add_metric({curr_key_str.lower(): int(value/1000)})

        self.metric_sender.add_metric({'openshift.master.metric.ping' : 1}) #

    def project_count(self):
        """ check the number of projects in Openshift """

        print "\nPerforming project count check..."

        excluded_names = ['openshift', 'openshift-infra', 'default', 'ops-monitor']
        response = self.ora.get('/oapi/v1/projects')

        project_names = [project['metadata']['name'] for project in response['items']]
        valid_names = set(project_names) - set(excluded_names)

        print "Project count: %s" % len(valid_names)

        self.metric_sender.add_metric({'openshift.project.count' : len(valid_names)})

    def pod_count(self):
        """ check the number of pods in Openshift """

        print "\nPerforming pod count check..."

        response = self.ora.get('/api/v1/pods')

        # Get running pod count
        running_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    running_pod_count += 1

        # Get running pod count on compute only nodes (non-infra)
        running_user_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    if 'nodeSelector' in i['spec']:
                        # logging pods don't have selector on 'type'
                        if 'type' in i['spec']['nodeSelector'] \
                           and i['spec']['nodeSelector']['type'] == 'compute':
                            running_user_pod_count += 1


        print "Total pod count: %s" % len(response['items'])
        print "Running pod count: %s" % running_pod_count
        print "User Running pod count: %s" % running_user_pod_count

        self.metric_sender.add_metric({'openshift.master.pod.running.count' : running_pod_count,
                                       'openshift.master.pod.user.running.count' : running_user_pod_count,
                                       'openshift.master.pod.total.count' : len(response['items'])})

    def user_count(self):
        """ check the number of users in Openshift """

        print "\nPerforming user count check..."

        response = self.ora.get('/oapi/v1/users')

        print "Total user count: %s" % len(response['items'])
        self.metric_sender.add_metric({'openshift.master.user.count' : len(response['items'])})

    @staticmethod
    def convert_to_GiB(value):
        """ take units as 'Gi', 'Ti', etc and return as int GiB """

        if 'G' in value:
            return int(value.strip('GIgi'))
        elif 'Ti' in value:
            return 1000 * int(value.replace('Ti', ''))

    def pv_info(self):
        """ Gather info about the persistent volumes in Openshift """

        print "\nPerforming user persistent volume count...\n"

        response = self.ora.get('/api/v1/persistentvolumes')

        pv_capacity_total = 0
        pv_capacity_available = 0
        pv_types = {'Available': 0,
                    'Bound': 0,
                    'Released': 0,
                    'Failed': 0}

        # Dynamic items variables
        discovery_key_pv = 'disc.pv'
        item_prototype_macro_pv = '#OSO_PV'
        item_prototype_key_count = 'disc.pv.count'
        item_prototype_key_available = 'disc.pv.available'
        dynamic_pv_count = defaultdict(int)
        dynamic_pv_available = defaultdict(int)

        for item in response['items']:
            # gather dynamic pv counts
            dynamic_pv_count[item['spec']['capacity']['storage']] += 1

            #get count of each pv type available
            pv_types[item['status']['phase']] += 1

            #get info for the capacity and capacity available
            capacity = item['spec']['capacity']['storage']
            if item['status']['phase'] == 'Available':
                # get total available capacity
                pv_capacity_available = pv_capacity_available + self.convert_to_GiB(capacity)

                # gather dynamic pv available counts
                dynamic_pv_available[item['spec']['capacity']['storage']] += 1

            pv_capacity_total = pv_capacity_total + self.convert_to_GiB(capacity)

        print "Total Persistent Volume Total count: %s" % len(response['items'])
        print 'Total Persistent Volume Capacity: %s' % pv_capacity_total
        print 'Total Persisten Volume Available Capacity: %s' % pv_capacity_available

        self.metric_sender.add_metric(
            {'openshift.master.pv.total.count' : len(response['items']),
             'openshift.master.pv.space.total': pv_capacity_total,
             'openshift.master.pv.space.available': pv_capacity_available})

        for key, value in pv_types.iteritems():
            print "Total Persistent Volume %s count: %s" % (key, value)
            self.metric_sender.add_metric(
                {'openshift.master.pv.%s.count' %key.lower() : value})

        # Add dynamic items
        self.metric_sender.add_dynamic_metric(discovery_key_pv, item_prototype_macro_pv, dynamic_pv_count.keys())

        for size, count in dynamic_pv_count.iteritems():
            print
            print "Total Persistent Volume %s count: %s" % (size, count)
            print "Total Persistent Volume available %s count: %s" % (size, dynamic_pv_available[size])

            self.metric_sender.add_metric({"%s[%s]" %(item_prototype_key_count, size) : count,
                                           "%s[%s]" %(item_prototype_key_available, size) : dynamic_pv_available[size]})


    def nodes_not_schedulable(self):
        """check the number of nodes in the cluster that are not schedulable"""

        print "\nPerforming nodes not schedulable check..."

        response = self.ora.get('/api/v1/nodes')

        nodes_not_schedulable = []

        for n in response['items']:
            if n['metadata']['labels']['type'] == 'master':
                if self.args.verbose:
                    print "Node: %s is a master\n" % n['metadata']['name']
            else:
                if "unschedulable" in n['spec']:
                    nodes_not_schedulable.append(n['metadata']['name'])

        print "Count of nodes not schedulable: %s" % len(nodes_not_schedulable)
        print "Nodes not schedulable: %s\n" % nodes_not_schedulable

        self.metric_sender.add_metric(
            {'openshift.master.nodesnotschedulable.count' : len(nodes_not_schedulable)})


    def nodes_not_ready(self):
        """ check the number of nodes in the cluster that are not ready"""

        print "\nPerforming nodes not ready check..."

        response = self.ora.get('/api/v1/nodes')

        nodes_not_ready = []

        for n in response['items']:
            has_ready_status = False
            for cond in n['status']['conditions']:
                if self.args.verbose:
                    print "Get ready status of %s" % n['metadata']['name']
                if cond['type'] == "Ready":
                    has_ready_status = True
                    if cond['status'].lower() != "true":
                        if self.args.verbose:
                            print "Non-true ready status of %s : %s" % (n['metadata']['name'], cond['status'])
                        nodes_not_ready.append(n['metadata']['name'])
            if has_ready_status == False:
                if self.args.verbose:
                    print "Did not find ready status for %s" % n['metadata']['name']
                nodes_not_ready.append(n['metadata']['name'])

        print "Count of nodes not ready: %s" % len(nodes_not_ready)

        self.metric_sender.add_metric(
            {'openshift.master.nodesnotready.count' : len(nodes_not_ready)})


    def nodes_not_labeled(self):
        """ check the nodes in the cluster that are not labeled
            Note: This check only searches for nodes with no label keys set"""

        print "\nPerforming nodes not labeled check..."

        response = self.ora.get('/api/v1/nodes')

        nodes_not_labeled = []
        nodes_labeled = []

        for n in response['items']:
            if 'labels' in n['metadata']:
                nodes_labeled.append(n['metadata']['name'])
            else:
                nodes_not_labeled.append(n['metadata']['name'])

        print "Nodes not labeled: %s\nNodes labeled: %s \n" % (nodes_not_labeled, nodes_labeled)
        self.metric_sender.add_metric(
            {'openshift.master.nodesnotlabeled.count' : len(nodes_not_labeled)})
Esempio n. 10
0
class OpenshiftMasterZaggClient(object):
    """ Checks for the Openshift Master """
    def __init__(self):
        self.args = None
        self.zagg_sender = None
        self.ora = OpenshiftRestApi()

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose,
                                      debug=self.args.debug)

        try:
            if self.args.healthz or self.args.all_checks:
                self.healthz_check()
        except Exception as ex:
            print "Problem performing healthz check: %s " % ex.message
            self.zagg_sender.add_zabbix_keys(
                {'openshift.master.api.healthz': 'false'})

        try:
            if self.args.api_ping or self.args.all_checks:
                self.api_ping()

            if self.args.project_count or self.args.all_checks:
                self.project_count()

            if self.args.pod_count or self.args.all_checks:
                self.pod_count()

            if self.args.user_count or self.args.all_checks:
                self.user_count()
        except Exception as ex:
            print "Problem Openshift API checks: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.api.ping':
                                              0})  # Openshift API is down

        try:
            if self.args.metrics or self.args.all_checks:
                self.metric_check()
        except Exception as ex:
            print "Problem getting Openshift metrics at /metrics: %s " % ex.message
            self.zagg_sender.add_zabbix_keys(
                {'openshift.master.metric.ping':
                 0})  # Openshift Metrics are down

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Network metric sender')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')

        master_check_group = parser.add_argument_group(
            'Different Checks to Perform')
        master_check_group.add_argument('--all-checks',
                                        action='store_true',
                                        default=None,
                                        help='Do all of the checks')

        master_check_group.add_argument(
            '--api-ping',
            action='store_true',
            default=None,
            help='Verify the Openshift API is alive')

        master_check_group.add_argument(
            '--healthz',
            action='store_true',
            default=None,
            help='Query the Openshift Master API /healthz')

        master_check_group.add_argument(
            '--metrics',
            action='store_true',
            default=None,
            help='Query the Openshift Master Metrics at /metrics')

        master_check_group.add_argument(
            '--project-count',
            action='store_true',
            default=None,
            help='Query the Openshift Master for Number of Pods')

        master_check_group.add_argument(
            '--pod-count',
            action='store_true',
            default=None,
            help='Query the Openshift Master for Number of Running Pods')

        master_check_group.add_argument(
            '--user-count',
            action='store_true',
            default=None,
            help='Query the Openshift Master for Number of Users')

        self.args = parser.parse_args()

    def api_ping(self):
        """ Verify the Openshift API health is responding correctly """

        print "\nPerforming Openshift API ping check..."

        response = self.ora.get('/api/v1/nodes')
        print "\nOpenshift API ping is alive"
        print "Number of nodes in the Openshift cluster: %s" % len(
            response['items'])

        self.zagg_sender.add_zabbix_keys({
            'openshift.master.api.ping':
            1,
            'openshift.master.node.count':
            len(response['items'])
        })

    def healthz_check(self):
        """ check the /healthz API call """

        print "\nPerforming /healthz check..."

        response = self.ora.get('/healthz', rtype='text')
        print "healthz check returns: %s " % response

        self.zagg_sender.add_zabbix_keys(
            {'openshift.master.api.healthz': str('ok' in response).lower()})

    def metric_check(self):
        """ collect certain metrics from the /metrics API call """

        print "\nPerforming /metrics check..."
        response = self.ora.get('/metrics', rtype='text')

        for metric_type in text_string_to_metric_families(response):

            # Collect the apiserver_request_latencies_summary{resource="pods",verb="LIST",quantiles in /metrics
            # Collect the apiserver_request_latencies_summary{resource="pods",verb="WATCHLIST",quantiles in /metrics
            if metric_type.name == 'apiserver_request_latencies_summary':
                key_str = 'openshift.master.apiserver.latency.summary'
                for sample in metric_type.samples:
                    if (sample[1]['resource'] == 'pods'
                            and sample[1].has_key('quantile')
                            and 'LIST' in sample[1]['verb']):
                        curr_key_str = key_str + ".pods.quantile.%s.%s" % (
                            sample[1]['verb'],
                            sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.zagg_sender.add_zabbix_keys(
                            {curr_key_str.lower(): int(value / 1000)})

            # Collect the scheduler_e2e_scheduling_latency_microseconds{quantiles in /metrics
            if metric_type.name == 'scheduler_e2e_scheduling_latency_microseconds':
                for sample in metric_type.samples:
                    if sample[1].has_key('quantile'):
                        key_str = 'openshift.master.scheduler.e2e.scheduling.latency'
                        curr_key_str = key_str + ".quantile.%s" % (
                            sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.zagg_sender.add_zabbix_keys(
                            {curr_key_str.lower(): int(value / 1000)})

        self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping':
                                          1})  #

    def project_count(self):
        """ check the number of projects in Openshift """

        print "\nPerforming project count check..."

        excluded_names = [
            'openshift', 'openshift-infra', 'default', 'ops-monitor'
        ]
        response = self.ora.get('/oapi/v1/projects')

        project_names = [
            project['metadata']['name'] for project in response['items']
        ]
        valid_names = set(project_names) - set(excluded_names)

        print "Project count: %s" % len(valid_names)

        self.zagg_sender.add_zabbix_keys(
            {'openshift.project.count': len(valid_names)})

    def pod_count(self):
        """ check the number of pods in Openshift """

        print "\nPerforming pod count check..."

        response = self.ora.get('/api/v1/pods')

        # Get running pod count
        running_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    running_pod_count += 1

        # Get running pod count on compute only nodes (non-infra)
        running_user_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    if 'nodeSelector' in i['spec']:
                        if i['spec']['nodeSelector']['type'] == 'compute':
                            running_user_pod_count += 1

        print "Total pod count: %s" % len(response['items'])
        print "Running pod count: %s" % running_pod_count
        print "User Running pod count: %s" % running_user_pod_count

        self.zagg_sender.add_zabbix_keys({
            'openshift.master.pod.running.count':
            running_pod_count,
            'openshift.master.pod.user.running.count':
            running_user_pod_count,
            'openshift.master.pod.total.count':
            len(response['items'])
        })

    def user_count(self):
        """ check the number of users in Openshift """

        print "\nPerforming user count check..."

        response = self.ora.get('/oapi/v1/users')

        print "Total user count: %s" % len(response['items'])
        self.zagg_sender.add_zabbix_keys(
            {'openshift.master.user.count': len(response['items'])})
class OpenshiftWebServiceChecker(object):
    """ Checks for Openshift Pods """

    def __init__(self):
        self.args = None
        self.ora = None
        self.zagg_sender = None
        self.service_ip = None
        self.service_port = '443'

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            self.get_service()
            status = self.check_service()

        except Exception as ex:
            print "Problem retreiving data: %s " % ex.message

        self.zagg_sender.add_zabbix_keys({
            "openshift.webservice.{}.status".format(self.args.pod) : status})

        self.zagg_sender.send_metrics()

    def get_service(self):
        """ Gets the service for a pod """

        print "\nLooking up services for pod\n"

        api_url = "/api/v1/services"
        if (str(self.args.namespace) != "None") & \
            (str(self.args.namespace) != "all"):
            api_url = '/api/v1/namespaces/{}/services'.format(self.args.namespace)

        print "using api url {}".format(api_url)

        api_yaml = self.ora.get(api_url, rtype='text')
        services = yaml.safe_load(api_yaml)

        for service in services["items"]:
            if self.args.pod and \
                self.args.pod in service["metadata"]["name"]:
                print "service IP is {}".format(service["spec"]["clusterIP"])
                self.service_ip = service["spec"]["clusterIP"]
                if self.args.portname != None:
                    for port in service["spec"]["ports"]:
                        if port["name"] == self.args.portname:
                            self.service_port = port["port"]
                else:
                    self.service_port = service["spec"]["ports"][0]["port"]
            else:
                pass

    def check_service(self):
        """ Checks the web service """

        print "\nChecking web service\n"

        if self.args.insecure:
            proto = 'http'
        else:
            proto = 'https'

        url = '{}://{}:{}/{}'.format(
            proto,
            self.service_ip,
            self.service_port,
            self.args.url,
        )

        try:
            print "Performing check on URL: {}".format(url)
            response = urllib2.urlopen(url, timeout=30)

            if str(response.getcode()) == self.args.status:
                if self.args.content == None \
                    or self.args.content in response.read():
                    return True

        except urllib2.URLError:
            print "Received error accessing URL: {}".format(url)
        except socket.timeout:
            print "Timed out accessing URL: {}".format(url)

        return False


    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Openshift pod sender')
        parser.add_argument('-p', '--pod', default=None, help='Check for pod with this specific name')
        parser.add_argument('-n', '--namespace', default=None, help='Check for pods in this namespace - "all" for all')
        parser.add_argument('-P', '--portname', default=None, help='name of the port to check')
        parser.add_argument('-u', '--url', default="/", help='URL to check. Defaults to "/".')
        parser.add_argument('-s', '--status', default="200", help='HTTP status code to expect. Defaults to 200')
        parser.add_argument('-c', '--content', default=None, help='Looks for a string in the content of the response.')
        parser.add_argument('-i', '--insecure', help='Use insecure http connection')
        parser.add_argument('-S', '--secure', help='Use secure https connection (default)')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')

        self.args = parser.parse_args()
Esempio n. 12
0
 def __init__(self):
     self.args = None
     self.zagg_sender = None
     self.ora = OpenshiftRestApi()
class OpenshiftClusterCapacity(object):
    ''' Checks for cluster capacity '''
    def __init__(self):
        self.args = None
        self.zagg_sender = None
        self.ora = None
        self.sql_conn = None
        self.zbx_key_prefix = "openshift.master.cluster.compute_nodes."

    def run(self):
        '''  Main function to run the check '''

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose,
                                      debug=self.args.debug)

        master_cfg = []
        with open(self.args.master_config, 'r') as yml:
            master_cfg = yaml.load(yml)
        self.ora = OpenshiftRestApi(
            host=master_cfg['oauthConfig']['masterURL'], verify_ssl=True)

        self.cluster_capacity()

        if not self.args.dry_run:
            self.zagg_sender.send_metrics()

    def parse_args(self):
        ''' parse the args from the cli '''

        parser = argparse.ArgumentParser(description='Cluster capacity sender')
        parser.add_argument(
            '--master-config',
            default='/etc/origin/master/master-config.yaml',
            help='Location of OpenShift master-config.yml file')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')
        parser.add_argument('--dry-run',
                            action='store_true',
                            default=False,
                            help='Do not sent results to Zabbix')

        self.args = parser.parse_args()

    def load_nodes(self):
        ''' load nodes into SQL '''

        self.sql_conn.execute('''CREATE TABLE nodes
                                 (name text, type text, api text,
                                  max_cpu integer, max_memory integer,
                                  max_pods integer)''')
        response = self.ora.get('/api/v1/nodes')

        for new_node in response['items']:
            # Skip nodes not in 'Ready' state
            node_ready = False
            for condition in new_node['status']['conditions']:
                if condition['type'] == 'Ready' and \
                   condition['status'] == 'True':
                    node_ready = True
            if not node_ready:
                continue

            # Skip unschedulable nodes
            if new_node['spec'].get('unschedulable'):
                continue

            node = {}
            node['name'] = new_node['metadata']['name']
            node['type'] = new_node['metadata']['labels'].get(
                'type', 'unknown')
            node['api'] = new_node['metadata']['selfLink']

            if 'allocatable' in new_node['status']:
                cpu = new_node['status']['allocatable']['cpu']
                mem = new_node['status']['allocatable']['memory']
                node['max_pods'] = int(
                    new_node['status']['allocatable']['pods'])
            else:
                cpu = new_node['status']['capacity']['cpu']
                mem = new_node['status']['capacity']['memory']
                node['max_pods'] = int(new_node['status']['capacity']['pods'])

            node['max_cpu'] = to_milicores(cpu)
            node['max_memory'] = to_bytes(mem)

            if self.args.debug:
                print "Adding node: {}".format(str(node))

            self.sql_conn.execute(
                'INSERT INTO nodes VALUES (?,?,?,?,?,?)',
                (node['name'], node['type'], node['api'], node['max_cpu'],
                 node['max_memory'], node['max_pods']))

    @staticmethod
    def load_container_limits(pod, containers):
        ''' process/store container limits data '''

        for container in containers:
            if 'limits' in container['resources']:
                pod['cpu_limits'] = int(pod.get('cpu_limits', 0)) \
                    + int(to_milicores(container['resources']['limits'].get('cpu', '0')))

                pod['memory_limits'] = int(pod.get('memory_limits', 0)) \
                    + int(to_bytes(container['resources']['limits'].get('memory', '0')))

            if 'requests' in container['resources']:
                pod['cpu_requests'] = int(pod.get('cpu_requests', 0)) \
                    + int(to_milicores(container['resources']['requests'].get('cpu', '0')))

                pod['memory_requests'] = int(pod.get('memory_requests', 0)) \
                    + int(to_bytes(container['resources']['requests'].get('memory', '0')))

    def load_pods(self):
        ''' put pod details into db '''

        self.sql_conn.execute('''CREATE TABLE pods
                                 (name text, namespace text, api text,
                                  cpu_limits integer, cpu_requests integer,
                                  memory_limits integer,
                                  memory_requests integer, node text)''')
        response = self.ora.get('/api/v1/pods')

        for new_pod in response['items']:
            if new_pod['status']['phase'] != 'Running':
                continue

            pod = {}
            pod['name'] = new_pod['metadata']['name']
            pod['namespace'] = new_pod['metadata']['namespace']
            pod['api'] = new_pod['metadata']['selfLink']
            pod['node'] = new_pod['spec']['nodeName']
            self.load_container_limits(pod, new_pod['spec']['containers'])

            self.sql_conn.execute(
                'INSERT INTO pods VALUES (?,?,?,?,?,?,?,?)',
                (pod['name'], pod['namespace'], pod['api'],
                 pod.get('cpu_limits'), pod.get('cpu_requests'),
                 pod.get('memory_limits'), pod.get('memory_requests'),
                 pod['node']))

    def get_largest_pod(self):
        ''' return single largest memory request number for all running pods '''

        max_pod = 0
        for row in self.sql_conn.execute('''SELECT MAX(memory_requests)
                                            FROM pods, nodes
                                            WHERE pods.node=nodes.name
                                              AND nodes.type="compute"'''):
            max_pod = row[0]

        return max_pod

    def how_many_schedulable(self, node_size):
        ''' return how many pods with memory request 'node_size' can be scheduled '''

        nodes = {}

        # get max mem for each compute node
        for row in self.sql_conn.execute('''SELECT nodes.name, nodes.max_memory
                                            FROM nodes
                                            WHERE nodes.type="compute"'''):
            nodes[row[0]] = {
                'max_memory': row[1],
                # set memory_allocated to '0' because node may have
                # no pods running, and next SQL query below will
                # leave this field unpopulated
                'memory_scheduled': 0
            }

        # get memory requests for all pods on all compute nodes
        for row in self.sql_conn.execute('''SELECT nodes.name,
                                                   SUM(pods.memory_requests)
                                            FROM pods, nodes
                                            WHERE pods.node=nodes.name
                                              AND nodes.type="compute"
                                            GROUP BY nodes.name'''):
            nodes[row[0]]['memory_scheduled'] = row[1]

        schedulable = 0
        for node in nodes.keys():
            # TODO: Some containers from `oc get pods --all-namespaces -o json`
            # don't have resources scheduled, causing memory_scheduled == 0
            available = nodes[node]['max_memory'] - \
                        nodes[node]['memory_scheduled']
            num = available / node_size
            # ignore negative number (overcommitted nodes)
            if num > 0:
                schedulable += num

        return schedulable

    def get_compute_nodes_max_schedulable_cpu(self):
        ''' calculate total schedulable CPU (in milicores) for all compute nodes '''
        max_cpu = 0
        for row in self.sql_conn.execute('''SELECT SUM(nodes.max_cpu)
                                            FROM nodes
                                            WHERE nodes.type="compute" '''):
            max_cpu = row[0]
        return max_cpu

    def get_compute_nodes_max_schedulable_mem(self):
        ''' calculate total schedulable memory for all compute nodes '''
        max_mem = 0
        for row in self.sql_conn.execute('''SELECT SUM(nodes.max_memory)
                                            FROM nodes
                                            WHERE nodes.type="compute" '''):
            max_mem = row[0]
        return max_mem

    def get_compute_nodes_scheduled_cpu(self):
        ''' calculate cpu scheduled to pods
            (total requested and percentage of cluster-wide total) '''
        max_cpu = self.get_compute_nodes_max_schedulable_cpu()
        cpu_requests_for_all_pods = 0
        for row in self.sql_conn.execute('''SELECT SUM(pods.cpu_requests)
                                            FROM pods, nodes
                                            WHERE pods.node = nodes.name
                                              AND nodes.type = "compute" '''):
            cpu_requests_for_all_pods = row[0]

        cpu_scheduled_as_pct = 100.0 * cpu_requests_for_all_pods / max_cpu

        cpu_unscheduled = max_cpu - cpu_requests_for_all_pods
        cpu_unscheduled_as_pct = 100.0 * cpu_unscheduled / max_cpu

        return (cpu_requests_for_all_pods, cpu_scheduled_as_pct,
                cpu_unscheduled, cpu_unscheduled_as_pct)

    def get_compute_nodes_scheduled_mem(self):
        ''' calculate mem allocated to pods
            (total requested and percentage of cluster-wide total) '''
        max_mem = self.get_compute_nodes_max_schedulable_mem()
        mem_requests_for_all_pods = 0
        for row in self.sql_conn.execute('''SELECT SUM(pods.memory_requests)
                                            FROM pods, nodes
                                            WHERE pods.node = nodes.name
                                              AND nodes.type = "compute" '''):
            mem_requests_for_all_pods = row[0]

        mem_scheduled_as_pct = 100.0 * mem_requests_for_all_pods / max_mem

        mem_unscheduled = max_mem - mem_requests_for_all_pods
        mem_unscheduled_as_pct = 100.0 * mem_unscheduled / max_mem

        return (mem_requests_for_all_pods, mem_scheduled_as_pct,
                mem_unscheduled, mem_unscheduled_as_pct)

    def get_oversub_cpu(self):
        ''' return percentage oversubscribed based on CPU limits on runing pods '''
        max_cpu = self.get_compute_nodes_max_schedulable_cpu()
        pod_cpu_limits = 0

        # get cpu limits for all running pods
        for row in self.sql_conn.execute('''SELECT SUM(pods.cpu_limits)
                                            FROM pods, nodes
                                            WHERE pods.node = nodes.name
                                              AND nodes.type = "compute" '''):
            pod_cpu_limits = row[0]

        return ((float(pod_cpu_limits) / max_cpu) * 100.0) - 100

    def get_oversub_mem(self):
        ''' return percentage oversubscribed based on memory limits on running pods '''
        max_mem = self.get_compute_nodes_max_schedulable_mem()
        pod_mem_limits = 0

        # get mem limits for all running pods
        for row in self.sql_conn.execute('''SELECT SUM(pods.memory_limits)
                                            FROM pods, nodes
                                            WHERE pods.node = nodes.name
                                              AND nodes.type = "compute" '''):
            pod_mem_limits = row[0]

        return ((float(pod_mem_limits) / max_mem) * 100.0) - 100

    def do_cpu_stats(self):
        ''' gather and report CPU statistics '''
        # CPU items
        zbx_key_max_schedulable_cpu = self.zbx_key_prefix + "max_schedulable.cpu"
        zbx_key_scheduled_cpu = self.zbx_key_prefix + "scheduled.cpu"
        zbx_key_scheduled_cpu_pct = self.zbx_key_prefix + "scheduled.cpu_pct"
        zbx_key_unscheduled_cpu = self.zbx_key_prefix + "unscheduled.cpu"
        zbx_key_unscheduled_cpu_pct = self.zbx_key_prefix + "unscheduled.cpu_pct"
        zbx_key_oversub_cpu_pct = self.zbx_key_prefix + "oversubscribed.cpu_pct"

        print "CPU Stats:"
        max_schedulable_cpu = self.get_compute_nodes_max_schedulable_cpu()
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_max_schedulable_cpu: max_schedulable_cpu})

        scheduled_cpu, scheduled_cpu_pct, unscheduled_cpu, unscheduled_cpu_pct = self.get_compute_nodes_scheduled_cpu(
        )
        oversub_cpu_pct = self.get_oversub_cpu()

        print "  Scheduled CPU for compute nodes:\t\t\t" + \
              "{:>15} milicores".format(scheduled_cpu)
        print "  Unscheduled CPU for compute nodes:\t\t\t" + \
              "{:>15} milicores".format(unscheduled_cpu)
        print "  Maximum (total) schedulable CPU for compute " + \
              "nodes:\t{:>15} milicores".format(max_schedulable_cpu)
        print "  Percent scheduled CPU for compute nodes:\t\t\t" + \
              "{:.2f}%".format(scheduled_cpu_pct)
        print "  Percent unscheduled CPU for compute nodes:\t\t\t" + \
              "{:.2f}%".format(unscheduled_cpu_pct)
        print "  Percent oversubscribed CPU for compute nodes: \t\t" + \
              "{:.2f}%".format(oversub_cpu_pct)
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_scheduled_cpu: scheduled_cpu})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_scheduled_cpu_pct: int(scheduled_cpu_pct)})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_unscheduled_cpu: unscheduled_cpu})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_unscheduled_cpu_pct: int(unscheduled_cpu_pct)})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_oversub_cpu_pct: int(oversub_cpu_pct)})

    def do_mem_stats(self):
        ''' gather and report memory statistics '''
        # Memory items
        zbx_key_max_schedulable_mem = self.zbx_key_prefix + "max_schedulable.mem"
        zbx_key_scheduled_mem = self.zbx_key_prefix + "scheduled.mem"
        zbx_key_scheduled_mem_pct = self.zbx_key_prefix + "scheduled.mem_pct"
        zbx_key_unscheduled_mem = self.zbx_key_prefix + "unscheduled.mem"
        zbx_key_unscheduled_mem_pct = self.zbx_key_prefix + "unscheduled.mem_pct"
        zbx_key_oversub_mem_pct = self.zbx_key_prefix + "oversubscribed.mem_pct"

        print "\nMemory Stats:"
        max_schedulable_mem = self.get_compute_nodes_max_schedulable_mem()
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_max_schedulable_mem: max_schedulable_mem})

        scheduled_mem, scheduled_mem_pct, unscheduled_mem, unscheduled_mem_pct = self.get_compute_nodes_scheduled_mem(
        )
        oversub_mem_pct = self.get_oversub_mem()
        print "  Scheduled mem for compute nodes:\t\t\t" + \
              "{:>20} bytes".format(scheduled_mem)
        print "  Unscheduled mem for compute nodes:\t\t\t" + \
              "{:>20} bytes".format(unscheduled_mem)
        print "  Maximum (total) schedulable memory for compute nodes:\t" + \
              "{:>20} bytes".format(max_schedulable_mem)
        print "  Percent scheduled mem for compute nodes:\t\t\t" + \
              "{:.2f}%".format(scheduled_mem_pct)
        print "  Percent unscheduled mem for compute nodes:\t\t\t" + \
              "{:.2f}%".format(unscheduled_mem_pct)
        print "  Percent oversubscribed mem for compute nodes: \t\t" + \
              "{:.2f}%".format(oversub_mem_pct)
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_scheduled_mem: scheduled_mem})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_scheduled_mem_pct: int(scheduled_mem_pct)})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_unscheduled_mem: unscheduled_mem})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_unscheduled_mem_pct: int(unscheduled_mem_pct)})
        self.zagg_sender.add_zabbix_keys(
            {zbx_key_oversub_mem_pct: int(oversub_mem_pct)})

    def cluster_capacity(self):
        ''' check capacity of compute nodes on cluster'''

        # Other zabbix items
        zbx_key_max_pods = "openshift.master.cluster.max_mem_pods_schedulable"

        self.sql_conn = sqlite3.connect(':memory:')

        self.load_nodes()
        self.load_pods()

        self.do_cpu_stats()
        self.do_mem_stats()

        print "\nOther stats:"
        largest = self.get_largest_pod()
        if self.args.debug:
            print "  Largest memory pod: {}".format(largest)

        schedulable = self.how_many_schedulable(largest)
        print "  Number of max-size nodes schedulable:\t\t\t\t{}".format(
            schedulable)
        self.zagg_sender.add_zabbix_keys({zbx_key_max_pods: schedulable})
Esempio n. 14
0
# Our jenkins server does not include these rpms.
# In the future we might move this to a container where these
# libs might exist
#pylint: disable=import-error
from openshift_tools.web.openshift_rest_api import OpenshiftRestApi
from openshift_tools.monitoring.metric_sender import MetricSender

import logging
logging.basicConfig(
    format='%(asctime)s - %(relativeCreated)6d - %(levelname)-8s - %(message)s',
)
logger = logging.getLogger()
logger.setLevel(logging.INFO)

ora = OpenshiftRestApi()

valid_node_types = ["master", "infra", "compute"]

def parse_args():
    """ parse the args from the cli """
    logger.debug("parse_args()")

    parser = argparse.ArgumentParser(description='OpenShift node counts')
    parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
    return parser.parse_args()

def send_metrics(expected, actual):    
    """ send data to MetricSender """
    logger.debug("send_metrics()")
class OpenshiftWebServiceChecker(object):
    """ Checks for Openshift Pods """

    def __init__(self):
        self.args = None
        self.ora = None
        self.metric_sender = None
        self.service_ip = None
        self.service_port = '443'

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.metric_sender = MetricSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            self.get_service()
            status = self.check_service()

        except Exception as ex:
            print "Problem retreiving data: %s " % ex.message

        self.metric_sender.add_metric({
            "openshift.webservice.{}.status".format(self.args.pod) : status})

        self.metric_sender.send_metrics()

    def get_service(self):
        """ Gets the service for a pod """

        print "\nLooking up services for pod\n"

        api_url = "/api/v1/services"
        if (str(self.args.namespace) != "None") & \
            (str(self.args.namespace) != "all"):
            api_url = '/api/v1/namespaces/{}/services'.format(self.args.namespace)

        print "using api url {}".format(api_url)

        api_yaml = self.ora.get(api_url, rtype='text')
        services = yaml.safe_load(api_yaml)

        for service in services["items"]:
            if self.args.pod and \
                self.args.pod in service["metadata"]["name"]:
                print "service IP is {}".format(service["spec"]["clusterIP"])
                self.service_ip = service["spec"]["clusterIP"]
                if self.args.portname != None:
                    for port in service["spec"]["ports"]:
                        if port["name"] == self.args.portname:
                            self.service_port = port["port"]
                else:
                    self.service_port = service["spec"]["ports"][0]["port"]
            else:
                pass

    def check_service(self):
        """ Checks the web service """

        print "\nChecking web service\n"

        if self.args.insecure:
            proto = 'http'
        else:
            proto = 'https'

        url = '{}://{}:{}/{}'.format(
            proto,
            self.service_ip,
            self.service_port,
            self.args.url,
        )

        try:
            print "Performing check on URL: {}".format(url)
            response = urllib2.urlopen(url, timeout=30)

            if str(response.getcode()) == self.args.status:
                if self.args.content == None \
                    or self.args.content in response.read():
                    return True

        except urllib2.URLError:
            print "Received error accessing URL: {}".format(url)
        except socket.timeout:
            print "Timed out accessing URL: {}".format(url)

        return False


    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Openshift pod sender')
        parser.add_argument('-p', '--pod', default=None, help='Check for pod with this specific name')
        parser.add_argument('-n', '--namespace', default=None, help='Check for pods in this namespace - "all" for all')
        parser.add_argument('-P', '--portname', default=None, help='name of the port to check')
        parser.add_argument('-u', '--url', default="/", help='URL to check. Defaults to "/".')
        parser.add_argument('-s', '--status', default="200", help='HTTP status code to expect. Defaults to 200')
        parser.add_argument('-c', '--content', default=None, help='Looks for a string in the content of the response.')
        parser.add_argument('-i', '--insecure', help='Use insecure http connection')
        parser.add_argument('-S', '--secure', help='Use secure https connection (default)')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')

        self.args = parser.parse_args()
class OpenshiftMasterZaggClient(object):
    """ Checks for the Openshift Master """

    def __init__(self):
        self.args = None
        self.zagg_sender = None
        self.ora = None
        self.zabbix_api_key = None
        self.zabbix_healthz_key = None

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        if self.args.local:
            self.ora = OpenshiftRestApi()
            self.args.api_ping = True
            self.args.healthz = True
            self.zabbix_api_key = 'openshift.master.local.api.ping'
            self.zabbix_healthz_key = 'openshift.master.local.api.healthz'
        else:
            master_cfg_from_yaml = []
            with open('/etc/origin/master/master-config.yaml', 'r') as yml:
                master_cfg_from_yaml = yaml.load(yml)
            self.ora = OpenshiftRestApi(host=master_cfg_from_yaml['oauthConfig']['masterURL'],
                                        verify_ssl=True)

            self.zabbix_api_key = 'openshift.master.api.ping'
            self.zabbix_healthz_key = 'openshift.master.api.healthz'

        try:
            if self.args.healthz or self.args.all_checks:
                self.healthz_check()

        except Exception as ex:
            print "Problem performing healthz check: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({self.zabbix_healthz_key: 'false'})

        try:
            if self.args.api_ping or self.args.all_checks:
                self.api_ping()

            if self.args.project_count or self.args.all_checks:
                self.project_count()

            if self.args.pod_count or self.args.all_checks:
                self.pod_count()

            if self.args.user_count or self.args.all_checks:
                self.user_count()

            if self.args.pv_info or self.args.all_checks:
                self.pv_info()

            if self.args.nodes_not_ready or self.args.all_checks:
                self.nodes_not_ready()

        except Exception as ex:
            print "Problem Openshift API checks: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({self.zabbix_api_key: 0}) # Openshift API is down

        try:
            if self.args.metrics or self.args.all_checks:
                self.metric_check()

        except Exception as ex:
            print "Problem getting Openshift metrics at /metrics: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Network metric sender')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')
        parser.add_argument('-l', '--local', action='store_true', default=False,
                            help='Run local checks against the local API (https://127.0.0.1)')

        master_check_group = parser.add_argument_group('Different Checks to Perform')
        master_check_group.add_argument('--all-checks', action='store_true', default=None,
                                        help='Do all of the checks')

        master_check_group.add_argument('--api-ping', action='store_true', default=None,
                                        help='Verify the Openshift API is alive')

        master_check_group.add_argument('--healthz', action='store_true', default=None,
                                        help='Query the Openshift Master API /healthz')

        master_check_group.add_argument('--metrics', action='store_true', default=None,
                                        help='Query the Openshift Master Metrics at /metrics')

        master_check_group.add_argument('--project-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Pods')

        master_check_group.add_argument('--pod-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Running Pods')

        master_check_group.add_argument('--user-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Users')

        master_check_group.add_argument('--pv-info', action='store_true', default=None,
                                        help='Query the Openshift Master for Persistent Volumes Info')

        master_check_group.add_argument('--nodes-not-ready', action='store_true', default=None,
                                        help='Query the Openshift Master for number of nodes not in Ready state')

        self.args = parser.parse_args()

    def api_ping(self):
        """ Verify the Openshift API health is responding correctly """

        print "\nPerforming Openshift API ping check..."

        response = self.ora.get('/api/v1/nodes')
        print "\nOpenshift API ping is alive"
        print "Number of nodes in the Openshift cluster: %s" % len(response['items'])

        self.zagg_sender.add_zabbix_keys({self.zabbix_api_key: 1,
                                          'openshift.master.node.count': len(response['items'])})

    def healthz_check(self):
        """ check the /healthz API call """

        print "\nPerforming /healthz check..."

        response = self.ora.get('/healthz', rtype='text')
        print "healthz check returns: %s " %response

        self.zagg_sender.add_zabbix_keys({self.zabbix_healthz_key: str('ok' in response).lower()})

    def metric_check(self):
        """ collect certain metrics from the /metrics API call """

        print "\nPerforming /metrics check..."
        response = self.ora.get('/metrics', rtype='text')

        for metric_type in text_string_to_metric_families(response):

            # Collect the apiserver_request_latencies_summary{resource="pods",verb="LIST",quantiles in /metrics
            # Collect the apiserver_request_latencies_summary{resource="pods",verb="WATCHLIST",quantiles in /metrics
            if metric_type.name == 'apiserver_request_latencies_summary':
                key_str = 'openshift.master.apiserver.latency.summary'
                for sample in metric_type.samples:
                    if (sample[1]['resource'] == 'pods'
                            and sample[1].has_key('quantile')
                            and 'LIST' in sample[1]['verb']):
                        curr_key_str = key_str + ".pods.quantile.%s.%s" % (sample[1]['verb'],
                                                                           sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)})

            # Collect the scheduler_e2e_scheduling_latency_microseconds{quantiles in /metrics
            if metric_type.name == 'scheduler_e2e_scheduling_latency_microseconds':
                for sample in metric_type.samples:
                    if sample[1].has_key('quantile'):
                        key_str = 'openshift.master.scheduler.e2e.scheduling.latency'
                        curr_key_str = key_str + ".quantile.%s" % (sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)})

        self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 1}) #

    def project_count(self):
        """ check the number of projects in Openshift """

        print "\nPerforming project count check..."

        excluded_names = ['openshift', 'openshift-infra', 'default', 'ops-monitor']
        response = self.ora.get('/oapi/v1/projects')

        project_names = [project['metadata']['name'] for project in response['items']]
        valid_names = set(project_names) - set(excluded_names)

        print "Project count: %s" % len(valid_names)

        self.zagg_sender.add_zabbix_keys({'openshift.project.count' : len(valid_names)})

    def pod_count(self):
        """ check the number of pods in Openshift """

        print "\nPerforming pod count check..."

        response = self.ora.get('/api/v1/pods')

        # Get running pod count
        running_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    running_pod_count += 1

        # Get running pod count on compute only nodes (non-infra)
        running_user_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    if 'nodeSelector' in i['spec']:
                        if i['spec']['nodeSelector']['type'] == 'compute':
                            running_user_pod_count += 1


        print "Total pod count: %s" % len(response['items'])
        print "Running pod count: %s" % running_pod_count
        print "User Running pod count: %s" % running_user_pod_count

        self.zagg_sender.add_zabbix_keys({'openshift.master.pod.running.count' : running_pod_count,
                                          'openshift.master.pod.user.running.count' : running_user_pod_count,
                                          'openshift.master.pod.total.count' : len(response['items'])})

    def user_count(self):
        """ check the number of users in Openshift """

        print "\nPerforming user count check..."

        response = self.ora.get('/oapi/v1/users')

        print "Total user count: %s" % len(response['items'])
        self.zagg_sender.add_zabbix_keys({'openshift.master.user.count' : len(response['items'])})

    def pv_info(self):
        """ Gather info about the persistent volumes in Openshift """

        print "\nPerforming user persistent volume count...\n"

        response = self.ora.get('/api/v1/persistentvolumes')

        pv_capacity_total = 0
        pv_capacity_available = 0
        pv_types = {'Available': 0,
                    'Bound': 0,
                    'Released': 0,
                    'Failed': 0}

        # Dynamic items variables
        discovery_key_pv = 'disc.pv'
        item_prototype_macro_pv = '#OSO_PV'
        item_prototype_key_count = 'disc.pv.count'
        item_prototype_key_available = 'disc.pv.available'
        dynamic_pv_count = defaultdict(int)
        dynamic_pv_available = defaultdict(int)

        for item in response['items']:
            # gather dynamic pv counts
            dynamic_pv_count[item['spec']['capacity']['storage']] += 1

            #get count of each pv type available
            pv_types[item['status']['phase']] += 1

            #get info for the capacity and capacity available
            capacity = item['spec']['capacity']['storage']
            if item['status']['phase'] == 'Available':
                # get total available capacity
                pv_capacity_available = pv_capacity_available + int(capacity.replace('Gi', ''))

                # gather dynamic pv available counts
                dynamic_pv_available[item['spec']['capacity']['storage']] += 1

            pv_capacity_total = pv_capacity_total + int(capacity.replace('Gi', ''))

        print "Total Persistent Volume Total count: %s" % len(response['items'])
        print 'Total Persistent Volume Capacity: %s' % pv_capacity_total
        print 'Total Persisten Volume Available Capacity: %s' % pv_capacity_available

        self.zagg_sender.add_zabbix_keys(
            {'openshift.master.pv.total.count' : len(response['items']),
             'openshift.master.pv.space.total': pv_capacity_total,
             'openshift.master.pv.space.available': pv_capacity_available})

        for key, value in pv_types.iteritems():
            print "Total Persistent Volume %s count: %s" % (key, value)
            self.zagg_sender.add_zabbix_keys(
                {'openshift.master.pv.%s.count' %key.lower() : value})

        # Add dynamic items
        self.zagg_sender.add_zabbix_dynamic_item(discovery_key_pv, item_prototype_macro_pv, dynamic_pv_count.keys())

        for size, count in dynamic_pv_count.iteritems():
            print
            print "Total Persistent Volume %s count: %s" % (size, count)
            print "Total Persistent Volume available %s count: %s" % (size, dynamic_pv_available[size])

            self.zagg_sender.add_zabbix_keys({"%s[%s]" %(item_prototype_key_count, size) : count,
                                              "%s[%s]" %(item_prototype_key_available, size) : dynamic_pv_available[size]})




    def nodes_not_ready(self):
        """ check the number of nodes in the cluster that are not ready"""

        print "\nPerforming nodes not ready check..."

        response = self.ora.get('/api/v1/nodes')

        nodes_not_schedulable = []

        for n in response['items']:
            if "unschedulable" in n['spec']:
                nodes_not_schedulable.append(n)

        nodes_not_ready = []

        for n in response['items']:
            has_ready_status = False
            for cond in n['status']['conditions']:
                if cond['reason'] == "KubeletReady":
                    has_ready_status = True
                    if cond['status'].lower() != "true":
                        nodes_not_ready.append(n)
            if has_ready_status == False:
                nodes_not_ready.append(n)


        print "Count of nodes not schedulable: %s" % len(nodes_not_schedulable)
        print "Count of nodes not ready: %s" % len(nodes_not_ready)

        self.zagg_sender.add_zabbix_keys(
            {'openshift.master.nodesnotready.count' : len(nodes_not_ready)})

        self.zagg_sender.add_zabbix_keys(
            {'openshift.master.nodesnotschedulable.count' : len(nodes_not_schedulable)})
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.metric_sender = MetricSender(verbose=self.args.verbose, debug=self.args.debug)

        if self.args.local:
            self.ora = OpenshiftRestApi()
            self.args.api_ping = True
            self.args.healthz = True
            self.zabbix_api_key = 'openshift.master.local.api.ping'
            self.zabbix_healthz_key = 'openshift.master.local.api.healthz'
        else:
            master_cfg_from_yaml = []
            with open('/etc/origin/master/master-config.yaml', 'r') as yml:
                master_cfg_from_yaml = yaml.load(yml)
            self.ora = OpenshiftRestApi(host=master_cfg_from_yaml['oauthConfig']['masterURL'],
                                        verify_ssl=True)

            self.zabbix_api_key = 'openshift.master.api.ping'
            self.zabbix_healthz_key = 'openshift.master.api.healthz'

        try:
            if self.args.healthz or self.args.all_checks:
                self.healthz_check()

        except Exception as ex:
            print "Problem performing healthz check: %s " % ex.message
            self.metric_sender.add_metric({self.zabbix_healthz_key: 'false'})

        try:
            if self.args.api_ping or self.args.all_checks:
                self.api_ping()

            if self.args.project_count or self.args.all_checks:
                self.project_count()

            if self.args.pod_count or self.args.all_checks:
                self.pod_count()

            if self.args.user_count or self.args.all_checks:
                self.user_count()

            if self.args.pv_info or self.args.all_checks:
                self.pv_info()

            if self.args.node_checks or self.args.all_checks:
                self.nodes_not_schedulable()
                self.nodes_not_ready()
                self.nodes_not_labeled()

        except Exception as ex:
            print "Problem Openshift API checks: %s " % ex.message
            self.metric_sender.add_metric({self.zabbix_api_key: 0}) # Openshift API is down

        try:
            if self.args.metrics or self.args.all_checks:
                self.metric_check()

        except Exception as ex:
            print "Problem getting Openshift metrics at /metrics: %s " % ex.message
            self.metric_sender.add_metric({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down

        self.metric_sender.send_metrics()
    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        if self.args.local:
            self.ora = OpenshiftRestApi()
            self.args.api_ping = True
            self.args.healthz = True
            self.zabbix_api_key = 'openshift.master.local.api.ping'
            self.zabbix_healthz_key = 'openshift.master.local.api.healthz'
        else:
            master_cfg_from_yaml = []
            with open('/etc/origin/master/master-config.yaml', 'r') as yml:
                master_cfg_from_yaml = yaml.load(yml)
            self.ora = OpenshiftRestApi(host=master_cfg_from_yaml['oauthConfig']['masterURL'],
                                        verify_ssl=True)

            self.zabbix_api_key = 'openshift.master.api.ping'
            self.zabbix_healthz_key = 'openshift.master.api.healthz'

        try:
            if self.args.healthz or self.args.all_checks:
                self.healthz_check()

        except Exception as ex:
            print "Problem performing healthz check: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({self.zabbix_healthz_key: 'false'})

        try:
            if self.args.api_ping or self.args.all_checks:
                self.api_ping()

            if self.args.project_count or self.args.all_checks:
                self.project_count()

            if self.args.pod_count or self.args.all_checks:
                self.pod_count()

            if self.args.user_count or self.args.all_checks:
                self.user_count()

            if self.args.pv_info or self.args.all_checks:
                self.pv_info()

            if self.args.nodes_not_ready or self.args.all_checks:
                self.nodes_not_ready()

        except Exception as ex:
            print "Problem Openshift API checks: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({self.zabbix_api_key: 0}) # Openshift API is down

        try:
            if self.args.metrics or self.args.all_checks:
                self.metric_check()

        except Exception as ex:
            print "Problem getting Openshift metrics at /metrics: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down

        self.zagg_sender.send_metrics()
 def __init__(self):
     self.args = None
     self.zagg_sender = None
     self.ora = OpenshiftRestApi()
class OpenshiftMasterZaggClient(object):
    """ Checks for the Openshift Master """

    def __init__(self):
        self.args = None
        self.zagg_sender = None
        self.ora = OpenshiftRestApi()

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose, debug=self.args.debug)

        try:
            if self.args.healthz or self.args.all_checks:
                self.healthz_check()
        except Exception as ex:
            print "Problem performing healthz check: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.api.healthz' : 'false'})

        try:
            if self.args.api_ping or self.args.all_checks:
                self.api_ping()

            if self.args.project_count or self.args.all_checks:
                self.project_count()

            if self.args.pod_count or self.args.all_checks:
                self.pod_count()

            if self.args.user_count or self.args.all_checks:
                self.user_count()
        except Exception as ex:
            print "Problem Openshift API checks: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.api.ping' : 0}) # Openshift API is down

        try:
            if self.args.metrics or self.args.all_checks:
                self.metric_check()
        except Exception as ex:
            print "Problem getting Openshift metrics at /metrics: %s " % ex.message
            self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down

        self.zagg_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Network metric sender')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')

        master_check_group = parser.add_argument_group('Different Checks to Perform')
        master_check_group.add_argument('--all-checks', action='store_true', default=None,
                                        help='Do all of the checks')

        master_check_group.add_argument('--api-ping', action='store_true', default=None,
                                        help='Verify the Openshift API is alive')

        master_check_group.add_argument('--healthz', action='store_true', default=None,
                                        help='Query the Openshift Master API /healthz')

        master_check_group.add_argument('--metrics', action='store_true', default=None,
                                        help='Query the Openshift Master Metrics at /metrics')

        master_check_group.add_argument('--project-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Pods')

        master_check_group.add_argument('--pod-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Running Pods')

        master_check_group.add_argument('--user-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Users')

        self.args = parser.parse_args()

    def api_ping(self):
        """ Verify the Openshift API health is responding correctly """

        print "\nPerforming Openshift API ping check..."

        response = self.ora.get('/api/v1/nodes')
        print "\nOpenshift API ping is alive"
        print "Number of nodes in the Openshift cluster: %s" % len(response['items'])

        self.zagg_sender.add_zabbix_keys({'openshift.master.api.ping' : 1,
                                          'openshift.master.node.count': len(response['items'])})

    def healthz_check(self):
        """ check the /healthz API call """

        print "\nPerforming /healthz check..."

        response = self.ora.get('/healthz', rtype='text')
        print "healthz check returns: %s " %response

        self.zagg_sender.add_zabbix_keys({'openshift.master.api.healthz' : str('ok' in response).lower()})

    def metric_check(self):
        """ collect certain metrics from the /metrics API call """

        print "\nPerforming /metrics check..."
        response = self.ora.get('/metrics', rtype='text')

        for metric_type in text_string_to_metric_families(response):

            # Collect the apiserver_request_latencies_summary{resource="pods",verb="LIST",quantiles in /metrics
            # Collect the apiserver_request_latencies_summary{resource="pods",verb="WATCHLIST",quantiles in /metrics
            if metric_type.name == 'apiserver_request_latencies_summary':
                key_str = 'openshift.master.apiserver.latency.summary'
                for sample in metric_type.samples:
                    if (sample[1]['resource'] == 'pods'
                            and sample[1].has_key('quantile')
                            and 'LIST' in sample[1]['verb']):
                        curr_key_str = key_str + ".pods.quantile.%s.%s" % (sample[1]['verb'],
                                                                           sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)})

            # Collect the scheduler_e2e_scheduling_latency_microseconds{quantiles in /metrics
            if metric_type.name == 'scheduler_e2e_scheduling_latency_microseconds':
                for sample in metric_type.samples:
                    if sample[1].has_key('quantile'):
                        key_str = 'openshift.master.scheduler.e2e.scheduling.latency'
                        curr_key_str = key_str + ".quantile.%s" % (sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.zagg_sender.add_zabbix_keys({curr_key_str.lower(): int(value/1000)})

        self.zagg_sender.add_zabbix_keys({'openshift.master.metric.ping' : 1}) #

    def project_count(self):
        """ check the number of projects in Openshift """

        print "\nPerforming project count check..."

        excluded_names = ['openshift', 'openshift-infra', 'default', 'ops-monitor']
        response = self.ora.get('/oapi/v1/projects')

        project_names = [project['metadata']['name'] for project in response['items']]
        valid_names = set(project_names) - set(excluded_names)

        print "Project count: %s" % len(valid_names)

        self.zagg_sender.add_zabbix_keys({'openshift.project.count' : len(valid_names)})

    def pod_count(self):
        """ check the number of pods in Openshift """

        print "\nPerforming pod count check..."

        response = self.ora.get('/api/v1/pods')

        # Get running pod count
        running_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    running_pod_count += 1

        # Get running pod count on compute only nodes (non-infra)
        running_user_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    if 'nodeSelector' in i['spec']:
                        if i['spec']['nodeSelector']['type'] == 'compute':
                            running_user_pod_count += 1


        print "Total pod count: %s" % len(response['items'])
        print "Running pod count: %s" % running_pod_count
        print "User Running pod count: %s" % running_user_pod_count

        self.zagg_sender.add_zabbix_keys({'openshift.master.pod.running.count' : running_pod_count,
                                          'openshift.master.pod.user.running.count' : running_user_pod_count,
                                          'openshift.master.pod.total.count' : len(response['items'])})

    def user_count(self):
        """ check the number of users in Openshift """

        print "\nPerforming user count check..."

        response = self.ora.get('/oapi/v1/users')

        print "Total user count: %s" % len(response['items'])
        self.zagg_sender.add_zabbix_keys({'openshift.master.user.count' : len(response['items'])})
Esempio n. 21
0
class OpenshiftClusterCapacity(object):
    ''' Checks for cluster capacity '''
    def __init__(self):
        self.args = None
        self.zagg_sender = None
        self.ora = None
        self.sql_conn = None

    def run(self):
        '''  Main function to run the check '''

        self.parse_args()
        self.zagg_sender = ZaggSender(verbose=self.args.verbose,
                                      debug=self.args.debug)

        master_cfg = []
        with open(self.args.master_config, 'r') as yml:
            master_cfg = yaml.load(yml)
        self.ora = OpenshiftRestApi(
            host=master_cfg['oauthConfig']['masterURL'], verify_ssl=True)

        self.node_capacity()

        if not self.args.dry_run:
            self.zagg_sender.send_metrics()

    def parse_args(self):
        ''' parse the args from the cli '''

        parser = argparse.ArgumentParser(description='Cluster capacity sender')
        parser.add_argument(
            '--master-config',
            default='/etc/origin/master/master-config.yaml',
            help='Location of OpenShift master-config.yml file')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')
        parser.add_argument('--dry-run',
                            action='store_true',
                            default=False,
                            help='Do not sent results to Zabbix')

        self.args = parser.parse_args()

    def load_nodes(self):
        ''' load nodes into SQL '''

        self.sql_conn.execute('''CREATE TABLE nodes
                                 (name text, type text, api text,
                                  max_cpu integer, max_memory integer,
                                  max_pods integer)''')
        response = self.ora.get('/api/v1/nodes')

        for new_node in response['items']:
            # Skip nodes not in 'Ready' state
            node_ready = False
            for condition in new_node['status']['conditions']:
                if condition['type'] == 'Ready' and \
                   condition['status'] == 'True':
                    node_ready = True
            if not node_ready:
                continue

            node = {}
            node['name'] = new_node['metadata']['name']
            node['type'] = new_node['metadata']['labels']['type']
            node['api'] = new_node['metadata']['selfLink']

            if 'allocatable' in new_node['status']:
                cpu = new_node['status']['allocatable']['cpu']
                mem = new_node['status']['allocatable']['memory']
                node['max_pods'] = int(
                    new_node['status']['allocatable']['pods'])
            else:
                cpu = new_node['status']['capacity']['cpu']
                mem = new_node['status']['capacity']['memory']
                node['max_pods'] = int(new_node['status']['capacity']['pods'])

            node['max_cpu'] = to_milicores(cpu)
            node['max_memory'] = to_bytes(mem)

            if self.args.debug:
                print "Adding node: {}".format(str(node))

            self.sql_conn.execute(
                'INSERT INTO nodes VALUES (?,?,?,?,?,?)',
                (node['name'], node['type'], node['api'], node['max_cpu'],
                 node['max_memory'], node['max_pods']))

    @staticmethod
    def load_container_limits(pod, containers):
        ''' process/store container limits data '''

        for container in containers:
            if 'limits' in container['resources']:
                cpu = container['resources']['limits'].get('cpu')
                if cpu:
                    pod['cpu_limits'] = pod.get('cpu_limits', 0) + \
                                        to_milicores(cpu)

                mem = container['resources']['limits'].get('memory')
                if mem:
                    pod['memory_limits'] = pod.get('memory_limits', 0) + \
                                           to_bytes(mem)

            if 'requests' in container['resources']:
                cpu = container['resources']['requests'].get('cpu')
                if cpu:
                    pod['cpu_requests'] = pod.get('cpu_requests', 0) + \
                                          to_milicores(cpu)

                mem = container['resources']['requests'].get('memory')
                if mem:
                    pod['memory_requests'] = pod.get('memory_requests', 0) + \
                                             to_bytes(mem)

    def load_pods(self):
        ''' put pod details into db '''

        self.sql_conn.execute('''CREATE TABLE pods
                                 (name text, namespace text, api text,
                                  cpu_limits integer, cpu_requets integer,
                                  memory_limits integer,
                                  memory_requests integer, node text)''')
        response = self.ora.get('/api/v1/pods')

        for new_pod in response['items']:
            if new_pod['status']['phase'] != 'Running':
                continue

            pod = {}
            pod['name'] = new_pod['metadata']['name']
            pod['namespace'] = new_pod['metadata']['namespace']
            pod['api'] = new_pod['metadata']['selfLink']
            pod['node'] = new_pod['spec']['nodeName']
            self.load_container_limits(pod, new_pod['spec']['containers'])

            self.sql_conn.execute(
                'INSERT INTO pods VALUES (?,?,?,?,?,?,?,?)',
                (pod['name'], pod['namespace'], pod['api'],
                 pod.get('cpu_limits'), pod.get('cpu_requests'),
                 pod.get('memory_limits'), pod.get('memory_requests'),
                 pod['node']))

    def get_memory_percentage(self):
        ''' calculate pod memory limits as a percentage
            of cluster (compute-node) memory capacity '''

        node_mem = 0
        pod_mem = 0

        for row in self.sql_conn.execute('''SELECT SUM(nodes.max_memory)
                                            FROM nodes
                                            WHERE nodes.type="compute"'''):
            node_mem = row[0]

        for row in self.sql_conn.execute('''SELECT SUM(pods.memory_limits)
                                            FROM pods, nodes
                                            WHERE pods.node=nodes.name
                                              AND nodes.type="compute"'''):
            pod_mem = row[0]

        return float(100) * pod_mem / node_mem

    def get_largest_pod(self):
        ''' return memory limit for largest pod '''

        max_pod = 0
        for row in self.sql_conn.execute('''SELECT MAX(memory_limits)
                                            FROM pods, nodes
                                            WHERE pods.node=nodes.name
                                              AND nodes.type="compute"'''):
            max_pod = row[0]

        return max_pod

    def how_many_schedulable(self, size):
        ''' return how many pods with memory 'size' can be scheduled '''

        nodes = {}

        # get max mem for each compute node
        for row in self.sql_conn.execute('''SELECT nodes.name, nodes.max_memory
                                            FROM nodes
                                            WHERE nodes.type="compute"'''):
            nodes[row[0]] = {'max_memory': row[1]}

        # get memory allocated/granted for each compute node
        for row in self.sql_conn.execute('''SELECT nodes.name,
                                                   SUM(pods.memory_limits)
                                            FROM pods, nodes
                                            WHERE pods.node=nodes.name
                                              AND nodes.type="compute"
                                            GROUP BY nodes.name'''):
            nodes[row[0]]['memory_allocated'] = row[1]

        schedulable = 0
        for node in nodes.keys():
            available = nodes[node]['max_memory'] - \
                        nodes[node]['memory_allocated']
            num = available / size
            # ignore negative number (overcommitted nodes)
            if num > 0:
                schedulable += num

        return schedulable

    def node_capacity(self):
        ''' check capacity of compute nodes '''

        zbx_key_mem_alloc = "openshift.master.cluster.memory_allocation"
        zbx_key_max_pods = "openshift.master.cluster.max_mem_pods_schedulable"

        self.sql_conn = sqlite3.connect(':memory:')

        self.load_nodes()
        self.load_pods()

        memory_percentage = self.get_memory_percentage()

        largest = self.get_largest_pod()
        if self.args.debug:
            print "Largest memory pod: {}".format(largest)

        schedulable = self.how_many_schedulable(largest)

        print "Percentage of memory allocated: {}".format(memory_percentage)
        print "Number of max-size nodes schedulable: {}".format(schedulable)

        self.zagg_sender.add_zabbix_keys(
            {zbx_key_mem_alloc: int(memory_percentage)})
        self.zagg_sender.add_zabbix_keys({zbx_key_max_pods: schedulable})
class OpenshiftPodChecker(object):
    """ Checks for Openshift Pods """
    def __init__(self):
        self.args = None
        self.ora = None
        self.metric_sender = None

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.ora = OpenshiftRestApi()
        self.metric_sender = MetricSender(verbose=self.args.verbose,
                                          debug=self.args.debug)

        try:
            self.get_pods()

        except Exception as ex:
            print "Problem retreiving pod data: %s " % ex.message

        self.metric_sender.send_metrics()

    def get_pods(self):
        """ Gets pod data """

        print "\nPerforming pod check ...\n"

        api_url = '/api/v1/pods'
        if (str(self.args.namespace) != "None") & \
            (str(self.args.namespace) != "all"):
            api_url = '/api/v1/namespaces/{}/pods'.format(self.args.namespace)

        api_yaml = self.ora.get(api_url, rtype='text')
        pods = yaml.safe_load(api_yaml)

        pod_count = 0
        for pod in pods["items"]:
            if self.args.pod and \
                self.args.pod in pod["metadata"]["name"]:
                print "status of {} is {}".format(
                    pod["metadata"]["name"],
                    pod["status"]["phase"],
                )
                if pod["status"]["phase"] == "Running":
                    pod_count += 1
            else:
                pass

        self.metric_sender.add_metric(
            {"service.pod.{}.count".format(self.args.pod): pod_count})

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Openshift pod sender')
        parser.add_argument('-p',
                            '--pod',
                            default=None,
                            help='Check for pod with this specific name')
        parser.add_argument(
            '-n',
            '--namespace',
            default=None,
            help='Check for pods in this namespace - "all" for all')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')

        self.args = parser.parse_args()