Esempio n. 1
0
def main():
    """  Main function to run the check """

    args = parse_args()
    metric_sender = MetricSender(verbose=args.verbose, debug=args.debug)

    filesys_full_metric = ['filesys.full']
    filesys_inode_derived_metrics = {
        'filesys.inodes.pused':
        'filesys.usedfiles / (filesys.usedfiles + filesys.freefiles) * 100'
    }

    discovery_key_fs = 'disc.filesys'
    item_prototype_macro_fs = '#OSO_FILESYS'
    item_prototype_key_full = 'disc.filesys.full'
    item_prototype_key_inode = 'disc.filesys.inodes.pused'

    # Get the disk space
    filesys_full_metrics = pminfo.get_metrics(filesys_full_metric)

    filtered_filesys_metrics = filter_out_docker_filesystems(
        filesys_full_metrics, 'filesys.full.')

    if args.filter_pod_pv:
        filtered_filesys_metrics = filter_out_customer_pv_filesystems(
            filtered_filesys_metrics)

    if args.force_send_zeros:
        filtered_filesys_metrics = zero_mount_percentages(
            filtered_filesys_metrics)

    metric_sender.add_dynamic_metric(discovery_key_fs, item_prototype_macro_fs,
                                     filtered_filesys_metrics.keys())
    for filesys_name, filesys_full in filtered_filesys_metrics.iteritems():
        metric_sender.add_metric(
            {'%s[%s]' % (item_prototype_key_full, filesys_name): filesys_full})

    # Get filesytem inode metrics
    filesys_inode_metrics = pminfo.get_metrics(
        derived_metrics=filesys_inode_derived_metrics)

    filtered_filesys_inode_metrics = filter_out_docker_filesystems(
        filesys_inode_metrics, 'filesys.inodes.pused.')

    if args.filter_pod_pv:
        filtered_filesys_inode_metrics = filter_out_customer_pv_filesystems(
            filtered_filesys_inode_metrics)

    if args.force_send_zeros:
        filtered_filesys_inode_metrics = zero_mount_percentages(
            filtered_filesys_inode_metrics)

    for filesys_name, filesys_inodes in filtered_filesys_inode_metrics.iteritems(
    ):
        metric_sender.add_metric({
            '%s[%s]' % (item_prototype_key_inode, filesys_name):
            filesys_inodes
        })

    metric_sender.send_metrics()
def main():
    """  Main function to run the check """

    args = parse_args()
    metric_sender = MetricSender(verbose=args.verbose, debug=args.debug)

    discovery_key_disk = 'disc.disk'
    interval = 3
    pcp_disk_dev_metrics = ['disk.dev.total', 'disk.dev.avactive']
    item_prototype_macro_disk = '#OSO_DISK'
    item_prototype_key_tps = 'disc.disk.tps'
    item_prototype_key_putil = 'disc.disk.putil'

    disk_metrics = pminfo.get_sampled_data(pcp_disk_dev_metrics, interval, 2)

    pcp_metrics_divided = {}
    for metric in pcp_disk_dev_metrics:
        pcp_metrics_divided[metric] = {
            k: v
            for k, v in disk_metrics.items() if metric in k
        }

    # do TPS checks; use disk.dev.total
    filtered_disk_totals = clean_up_metric_dict(
        pcp_metrics_divided[pcp_disk_dev_metrics[0]],
        pcp_disk_dev_metrics[0] + '.')

    # Add dynamic items
    metric_sender.add_dynamic_metric(discovery_key_disk,
                                     item_prototype_macro_disk,
                                     filtered_disk_totals.keys())

    # calculate the TPS and add them to the ZaggSender
    for disk, totals in filtered_disk_totals.iteritems():
        disk_tps = (totals[1] - totals[0]) / interval
        metric_sender.add_metric(
            {'%s[%s]' % (item_prototype_key_tps, disk): disk_tps})

    # do % Util checks; use disk.dev.avactive
    filtered_disk_totals = clean_up_metric_dict(
        pcp_metrics_divided[pcp_disk_dev_metrics[1]],
        pcp_disk_dev_metrics[1] + '.')

    # calculate the % Util and add them to the ZaggSender
    for disk, totals in filtered_disk_totals.iteritems():
        total_active = (float)(totals[1] - totals[0]) / 1000.0
        putil = 100 * total_active / interval

        metric_sender.add_metric(
            {'%s[%s]' % (item_prototype_key_putil, disk): putil})

    metric_sender.send_metrics()
Esempio n. 3
0
def send_metric_data(bucket_list, bucket_stats, args):
    '''send data to zabbix '''
    discovery_key = "disc.aws"
    discovery_macro = "#S3_BUCKET"
    prototype_s3_size = "disc.aws.size"
    prototype_s3_count = "disc.aws.objects"

    mts = MetricSender(verbose=args.debug)
    mts.add_dynamic_metric(discovery_key, discovery_macro, bucket_list)
    for bucket in bucket_stats.keys():
        zab_key = "{}[{}]".format(prototype_s3_size, bucket)
        mts.add_metric({zab_key: int(round(bucket_stats[bucket]["size"]))})

        zab_key = "{}[{}]".format(prototype_s3_count, bucket)
        mts.add_metric({zab_key: bucket_stats[bucket]["objects"]})
    mts.send_metrics()
Esempio n. 4
0
    def report_to_zabbix(self, disc_key, disc_macro, item_proto_key, value):
        """ Sends the commands exit code to zabbix. """
        mts = MetricSender()

        # Add the dynamic item
        self.verbose_print("Adding the dynamic item to Zabbix - %s, %s, [%s]" % \
                           (disc_key, disc_macro, self.args.name))
        mts.add_dynamic_metric(disc_key, disc_macro, [self.args.name])

        # Send the value for the dynamic item
        self.verbose_print("Sending metric to Zabbix - %s[%s]: %s" % \
                           (item_proto_key, self.args.name, value))
        mts.add_metric({'%s[%s]' % (item_proto_key, self.args.name): value})

        # Actually send them
        mts.send_metrics()
Esempio n. 5
0
def main():
    """  Main function to run the check """

    args = parse_args()
    metric_sender = MetricSender(verbose=args.verbose, debug=args.debug)

    discovery_key_network = 'disc.network'
    pcp_network_dev_metrics = ['network.interface.in.bytes', 'network.interface.out.bytes']
    item_proto_macro_network = '#OSO_NET_INTERFACE'
    item_proto_key_in_bytes = 'disc.network.in.bytes'
    item_proto_key_out_bytes = 'disc.network.out.bytes'

    network_metrics = pminfo.get_metrics(pcp_network_dev_metrics)

    pcp_metrics_divided = {}
    for metric in pcp_network_dev_metrics:
        pcp_metrics_divided[metric] = {k: v for k, v in network_metrics.items() if metric in k}

    # do Network In; use network.interface.in.bytes
    filtered_network_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_network_dev_metrics[0]],
                                                   pcp_network_dev_metrics[0] + '.')

    # Add dynamic items
    metric_sender.add_dynamic_metric(discovery_key_network, item_proto_macro_network, filtered_network_totals.keys())

    # Report Network IN bytes; them to the MetricSender
    for interface, total in filtered_network_totals.iteritems():
        metric_sender.add_metric({'%s[%s]' % (item_proto_key_in_bytes, interface): total})

    # Report Network OUT Bytes;  use network.interface.out.bytes
    filtered_network_totals = clean_up_metric_dict(pcp_metrics_divided[pcp_network_dev_metrics[1]],
                                                   pcp_network_dev_metrics[1] + '.')

    # calculate the % Util and add them to the MetricSender
    for interface, total in filtered_network_totals.iteritems():

        metric_sender.add_metric({'%s[%s]' % (item_proto_key_out_bytes, interface): total})

    metric_sender.send_metrics()
Esempio n. 6
0
    def report_to_zabbix(self, total_snapshottable_vols, total_snapshots_created, total_snapshot_creation_errors):
        """ Sends the commands exit code to zabbix. """
        mts = MetricSender(verbose=True)


        # Populate EBS_SNAPSHOTTER_DISC_SCHEDULE_MACRO with the schedule
        mts.add_dynamic_metric(EBS_SNAPSHOTTER_DISC_KEY, EBS_SNAPSHOTTER_DISC_SCHEDULE_MACRO, \
                                   [self.args.with_schedule])

        # Send total_snapshottable_vols prototype item key and value
        mts.add_metric({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOTTABLE_VOLUMES_KEY, self.args.with_schedule): \
                           total_snapshottable_vols})

        # Send total_snapshots_created prototype item key and value
        mts.add_metric({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOTS_CREATED_KEY, self.args.with_schedule): \
                           total_snapshots_created})

        # Send total_snapshot_creation_errors prototype item key and value
        mts.add_metric({'%s[%s]' % (EBS_SNAPSHOTTER_SNAPSHOT_CREATION_ERRORS_KEY, self.args.with_schedule): \
                           total_snapshot_creation_errors})


        # Actually send them
        mts.send_metrics()
Esempio n. 7
0
class EBSStuckVolumesCheck(object):
    """
       This class houses a check that looks for EBS volumes that are stuck in a
       transition state (attaching, detaching, busy, etc).
    """
    def __init__(self):
        """ initialize EBSStuckVolumesCheck class """
        self.args = None
        self.vol_state_data = None

        self.parse_args()

        # Make sure we're using the profile they've requested.
        if self.args.aws_creds_profile:
            os.environ['AWS_PROFILE'] = self.args.aws_creds_profile

        self.eu = EbsUtil(self.args.region, verbose=self.args.verbose)
        self.mts = MetricSender(verbose=self.args.verbose)

    def parse_args(self):
        ''' Parse arguments passed to the script '''
        parser = argparse.ArgumentParser(
            description='OpenShift Cluster Metrics Checker')
        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose output')
        parser.add_argument('--region',
                            required=True,
                            help='AWS EC2 Region to check')
        parser.add_argument('--stuck-after', default=120, type=int,
                            help='Amount of time in seconds after which the volume is ' + \
                                 'determined to be "stuck".')
        parser.add_argument('--aws-creds-profile',
                            required=False,
                            help='The AWS credentials profile to use.')

        self.args = parser.parse_args()

    @staticmethod
    def read_raw_volume_state_data():
        """ Reads in the raw string the volume state data from disk """
        if not os.path.isfile(STATE_DATA_FILE):
            return ""  # Act like the file is blank

        with open(STATE_DATA_FILE, 'r') as stream:
            return stream.read()

    def load_volume_state_data(self):
        """ Loads the volume state data from disk """
        if os.path.isfile(STATE_DATA_FILE):
            with open(STATE_DATA_FILE, 'r') as stream:
                self.vol_state_data = yaml.load(stream)
        else:
            self.vol_state_data = {}

    def save_volume_state_data(self):
        """ Saves the volume state data to disk """
        with open(STATE_DATA_FILE, 'w') as outfile:
            yaml.dump(self.vol_state_data,
                      outfile,
                      default_flow_style=False,
                      allow_unicode=True)

    def add_new_transitioning_volumes(self, trans_vols):
        """ Adds volumes that we haven't seen before that are in a transitioning state. """
        for vol in trans_vols:
            vol_uri = self.eu.generate_volume_uri(vol)

            if vol_uri not in self.vol_state_data.keys():
                # This is the first time we've seen this volume, add it.
                vol_uri = self.eu.generate_volume_uri(vol)
                self.vol_state_data[vol_uri] = {}
                self.vol_state_data[vol_uri][STUCK_AFTER_KEY] = datetime.now() + \
                    timedelta(seconds=self.args.stuck_after)
                self.vol_state_data[vol_uri][VOLUME_ID_KEY] = str(vol.id)
                self.vol_state_data[vol_uri][STATE_KEY] = TRANSITION_STATE

            self.vol_state_data[vol_uri][ATTACH_STATUS_KEY] = str(
                vol.attach_data.status)

    def set_stuck_volumes(self):
        """ Sets volumes to state 'stuck' if they've passed their transition state deadline. """
        for item in self.vol_state_data.itervalues():
            # We don't want to set unstuck volumes back to stuck.
            if item[STATE_KEY] != UNSTUCK_STATE:
                if datetime.now() > item[STUCK_AFTER_KEY]:
                    item[STATE_KEY] = STUCK_STATE

    def set_unstuck_volumes(self, trans_vols):
        """
            Change volumes that were in state 'stuck' that are no longer in transition,
            to state 'unstuck'.
        """

        trans_vol_ids = [str(vol.id) for vol in trans_vols]

        for vol_uri, cache_data in self.vol_state_data.iteritems():
            if cache_data[STATE_KEY] == STUCK_STATE and \
               cache_data[VOLUME_ID_KEY] not in trans_vol_ids:
                # This volue was stuck, but isn't any longer
                self.vol_state_data[vol_uri][STATE_KEY] = UNSTUCK_STATE

    def report_stuck_volumes(self):
        """ sends data to monitoring that these volumes are stuck. """
        for vol_uri, cache_data in self.vol_state_data.iteritems():
            if cache_data[STATE_KEY] == STUCK_STATE:
                self.mts.add_dynamic_metric(EBS_VOLUME_URI_DISC_KEY,
                                            EBS_VOLUME_URI_DISC_MACRO,
                                            [vol_uri])

                item_name = '%s[%s]' % (EBS_VOLUME_ATTACH_STATE_KEY, vol_uri)
                self.mts.add_metric({item_name: MONITORING_STUCK_VALUE})

        # Actually send them
        self.mts.send_metrics()

    def report_unstuck_volumes(self):
        """ sends data to monitoring that these volumes have become unstuck. """
        for vol_uri, cache_data in self.vol_state_data.iteritems():
            if cache_data[STATE_KEY] == UNSTUCK_STATE:
                self.mts.add_dynamic_metric(EBS_VOLUME_URI_DISC_KEY,
                                            EBS_VOLUME_URI_DISC_MACRO,
                                            [vol_uri])

                item_name = '%s[%s]' % (EBS_VOLUME_ATTACH_STATE_KEY, vol_uri)
                self.mts.add_metric({item_name: MONITORING_UNSTUCK_VALUE})

        # Actually send them
        self.mts.send_metrics()

    def remove_unstuck_volumes_from_state_data(self):
        """ Removes state 'unstuck' volumes from the state data (no longer need to track them) """
        for vol_uri in self.vol_state_data.keys():
            cache_data = self.vol_state_data[vol_uri]
            if cache_data[STATE_KEY] == UNSTUCK_STATE:
                # This volume was stuck, but isn't any longer
                del self.vol_state_data[vol_uri]

    def remove_no_longer_transitioning_volumes(self, trans_vols):
        """ Remove volumes that were transitioning, but are no longer in the trans_vols list """

        trans_vol_ids = [str(vol.id) for vol in trans_vols]

        for vol_uri in self.vol_state_data.keys():
            cache_data = self.vol_state_data[vol_uri]
            if cache_data[STATE_KEY] == TRANSITION_STATE and \
               cache_data[VOLUME_ID_KEY] not in trans_vol_ids:
                # This volume was transitioning, but isn't any longer
                del self.vol_state_data[vol_uri]

    def run(self):
        """ Run the main logic of this check """

        # Load the state machine data
        self.load_volume_state_data()

        # Get the volumes that are currently in a transitioning state
        trans_vols = self.eu.get_trans_attach_status_vols()

        # Based on that list, weed out the volumes that used to be transitioning,
        # that are no longer in the transitioning volumes list. This means that
        # it was a normal volume transition, probably from attaching to attached
        # or detaching to detached (aka None).
        self.remove_no_longer_transitioning_volumes(trans_vols)

        # Check on the volumes that were in the stuck state that are no longer
        # in the transitioning volumes list. This means that they went from stuck
        # to unstuck. We need to track these so that we can report that they've become
        # unstuck to monitoring.
        self.set_unstuck_volumes(trans_vols)

        # Add any volumes that are transitioning that we haven't seen before to our data
        self.add_new_transitioning_volumes(trans_vols)

        # Change volumes that are still transitioning and have hit their deadline to
        # finish that transition to a state of "stuck"
        self.set_stuck_volumes()

        # Report to monitoring the stuck volumes
        self.report_stuck_volumes()

        # Report to monitoring the volumes that were stuck, but are now unstuck (no
        # longer transitioning)
        self.report_unstuck_volumes()

        # Since the unstuck volumes have been reported, they can safeuly be removed from
        # our tracking now.
        self.remove_unstuck_volumes_from_state_data()

        # Make sure we save state for the next run.
        self.save_volume_state_data()

        self.eu.verbose_print("\nTracking Volumes")
        self.eu.verbose_print("----------------\n")

        # Cat out the state file
        raw_state_file = self.read_raw_volume_state_data()
        self.eu.verbose_print(raw_state_file)
class OpenshiftMasterZaggClient(object):
    """ Checks for the Openshift Master """

    def __init__(self):
        self.args = None
        self.metric_sender = None
        self.ora = None
        self.zabbix_api_key = None
        self.zabbix_healthz_key = None

    def run(self):
        """  Main function to run the check """

        self.parse_args()
        self.metric_sender = MetricSender(verbose=self.args.verbose, debug=self.args.debug)

        if self.args.local:
            self.ora = OpenshiftRestApi()
            self.args.api_ping = True
            self.args.healthz = True
            self.zabbix_api_key = 'openshift.master.local.api.ping'
            self.zabbix_healthz_key = 'openshift.master.local.api.healthz'
        else:
            master_cfg_from_yaml = []
            with open('/etc/origin/master/master-config.yaml', 'r') as yml:
                master_cfg_from_yaml = yaml.load(yml)
            self.ora = OpenshiftRestApi(host=master_cfg_from_yaml['oauthConfig']['masterURL'],
                                        verify_ssl=True)

            self.zabbix_api_key = 'openshift.master.api.ping'
            self.zabbix_healthz_key = 'openshift.master.api.healthz'

        try:
            if self.args.healthz or self.args.all_checks:
                self.healthz_check()

        except Exception as ex:
            print "Problem performing healthz check: %s " % ex.message
            self.metric_sender.add_metric({self.zabbix_healthz_key: 'false'})

        try:
            if self.args.api_ping or self.args.all_checks:
                self.api_ping()

            if self.args.project_count or self.args.all_checks:
                self.project_count()

            if self.args.pod_count or self.args.all_checks:
                self.pod_count()

            if self.args.user_count or self.args.all_checks:
                self.user_count()

            if self.args.pv_info or self.args.all_checks:
                self.pv_info()

            if self.args.node_checks or self.args.all_checks:
                self.nodes_not_schedulable()
                self.nodes_not_ready()
                self.nodes_not_labeled()

        except Exception as ex:
            print "Problem Openshift API checks: %s " % ex.message
            self.metric_sender.add_metric({self.zabbix_api_key: 0}) # Openshift API is down

        try:
            if self.args.metrics or self.args.all_checks:
                self.metric_check()

        except Exception as ex:
            print "Problem getting Openshift metrics at /metrics: %s " % ex.message
            self.metric_sender.add_metric({'openshift.master.metric.ping' : 0}) # Openshift Metrics are down

        self.metric_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """

        parser = argparse.ArgumentParser(description='Network metric sender')
        parser.add_argument('-v', '--verbose', action='store_true', default=None, help='Verbose?')
        parser.add_argument('--debug', action='store_true', default=None, help='Debug?')
        parser.add_argument('-l', '--local', action='store_true', default=False,
                            help='Run local checks against the local API (https://127.0.0.1)')

        master_check_group = parser.add_argument_group('Different Checks to Perform')
        master_check_group.add_argument('--all-checks', action='store_true', default=None,
                                        help='Do all of the checks')

        master_check_group.add_argument('--api-ping', action='store_true', default=None,
                                        help='Verify the Openshift API is alive')

        master_check_group.add_argument('--healthz', action='store_true', default=None,
                                        help='Query the Openshift Master API /healthz')

        master_check_group.add_argument('--metrics', action='store_true', default=None,
                                        help='Query the Openshift Master Metrics at /metrics')

        master_check_group.add_argument('--project-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Pods')

        master_check_group.add_argument('--pod-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Running Pods')

        master_check_group.add_argument('--user-count', action='store_true', default=None,
                                        help='Query the Openshift Master for Number of Users')

        master_check_group.add_argument('--pv-info', action='store_true', default=None,
                                        help='Query the Openshift Master for Persistent Volumes Info')

        master_check_group.add_argument('--node-checks', action='store_true', default=None,
                                        help='Query the Openshift Master for node checks')

        self.args = parser.parse_args()

    def api_ping(self):
        """ Verify the Openshift API health is responding correctly """

        print "\nPerforming Openshift API ping check..."

        response = self.ora.get('/api/v1/nodes')
        print "\nOpenshift API ping is alive"
        print "Number of nodes in the Openshift cluster: %s" % len(response['items'])

        self.metric_sender.add_metric({self.zabbix_api_key: 1, 'openshift.master.node.count': len(response['items'])})

    def healthz_check(self):
        """ check the /healthz API call """

        print "\nPerforming /healthz check..."

        response = self.ora.get('/healthz', rtype='text')
        print "healthz check returns: %s " %response

        self.metric_sender.add_metric({self.zabbix_healthz_key: str('ok' in response).lower()})

    def metric_check(self):
        """ collect certain metrics from the /metrics API call """

        print "\nPerforming /metrics check..."
        response = self.ora.get('/metrics', rtype='text')

        for metric_type in text_string_to_metric_families(response):

            # Collect the apiserver_request_latencies_summary{resource="pods",verb="LIST",quantiles in /metrics
            # Collect the apiserver_request_latencies_summary{resource="pods",verb="WATCHLIST",quantiles in /metrics
            if metric_type.name == 'apiserver_request_latencies_summary':
                key_str = 'openshift.master.apiserver.latency.summary'
                for sample in metric_type.samples:
                    if (sample[1]['resource'] == 'pods'
                            and sample[1].has_key('quantile')
                            and 'LIST' in sample[1]['verb']):
                        curr_key_str = key_str + ".pods.quantile.%s.%s" % (sample[1]['verb'],
                                                                           sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.metric_sender.add_metric({curr_key_str.lower(): int(value/1000)})

            # Collect the scheduler_e2e_scheduling_latency_microseconds{quantiles in /metrics
            if metric_type.name == 'scheduler_e2e_scheduling_latency_microseconds':
                for sample in metric_type.samples:
                    if sample[1].has_key('quantile'):
                        key_str = 'openshift.master.scheduler.e2e.scheduling.latency'
                        curr_key_str = key_str + ".quantile.%s" % (sample[1]['quantile'].split('.')[1])

                        if math.isnan(sample[2]):
                            value = 0
                        else:
                            value = sample[2]

                        self.metric_sender.add_metric({curr_key_str.lower(): int(value/1000)})

        self.metric_sender.add_metric({'openshift.master.metric.ping' : 1}) #

    def project_count(self):
        """ check the number of projects in Openshift """

        print "\nPerforming project count check..."

        excluded_names = ['openshift', 'openshift-infra', 'default', 'ops-monitor']
        response = self.ora.get('/oapi/v1/projects')

        project_names = [project['metadata']['name'] for project in response['items']]
        valid_names = set(project_names) - set(excluded_names)

        print "Project count: %s" % len(valid_names)

        self.metric_sender.add_metric({'openshift.project.count' : len(valid_names)})

    def pod_count(self):
        """ check the number of pods in Openshift """

        print "\nPerforming pod count check..."

        response = self.ora.get('/api/v1/pods')

        # Get running pod count
        running_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    running_pod_count += 1

        # Get running pod count on compute only nodes (non-infra)
        running_user_pod_count = 0
        for i in response['items']:
            if 'containerStatuses' in i['status']:
                if 'running' in i['status']['containerStatuses'][0]['state']:
                    if 'nodeSelector' in i['spec']:
                        # logging pods don't have selector on 'type'
                        if 'type' in i['spec']['nodeSelector'] \
                           and i['spec']['nodeSelector']['type'] == 'compute':
                            running_user_pod_count += 1


        print "Total pod count: %s" % len(response['items'])
        print "Running pod count: %s" % running_pod_count
        print "User Running pod count: %s" % running_user_pod_count

        self.metric_sender.add_metric({'openshift.master.pod.running.count' : running_pod_count,
                                       'openshift.master.pod.user.running.count' : running_user_pod_count,
                                       'openshift.master.pod.total.count' : len(response['items'])})

    def user_count(self):
        """ check the number of users in Openshift """

        print "\nPerforming user count check..."

        response = self.ora.get('/oapi/v1/users')

        print "Total user count: %s" % len(response['items'])
        self.metric_sender.add_metric({'openshift.master.user.count' : len(response['items'])})

    @staticmethod
    def convert_to_GiB(value):
        """ take units as 'Gi', 'Ti', etc and return as int GiB """

        if 'G' in value:
            return int(value.strip('GIgi'))
        elif 'Ti' in value:
            return 1000 * int(value.replace('Ti', ''))

    def pv_info(self):
        """ Gather info about the persistent volumes in Openshift """

        print "\nPerforming user persistent volume count...\n"

        response = self.ora.get('/api/v1/persistentvolumes')

        pv_capacity_total = 0
        pv_capacity_available = 0
        pv_types = {'Available': 0,
                    'Bound': 0,
                    'Released': 0,
                    'Failed': 0}

        # Dynamic items variables
        discovery_key_pv = 'disc.pv'
        item_prototype_macro_pv = '#OSO_PV'
        item_prototype_key_count = 'disc.pv.count'
        item_prototype_key_available = 'disc.pv.available'
        dynamic_pv_count = defaultdict(int)
        dynamic_pv_available = defaultdict(int)

        for item in response['items']:
            # gather dynamic pv counts
            dynamic_pv_count[item['spec']['capacity']['storage']] += 1

            #get count of each pv type available
            pv_types[item['status']['phase']] += 1

            #get info for the capacity and capacity available
            capacity = item['spec']['capacity']['storage']
            if item['status']['phase'] == 'Available':
                # get total available capacity
                pv_capacity_available = pv_capacity_available + self.convert_to_GiB(capacity)

                # gather dynamic pv available counts
                dynamic_pv_available[item['spec']['capacity']['storage']] += 1

            pv_capacity_total = pv_capacity_total + self.convert_to_GiB(capacity)

        print "Total Persistent Volume Total count: %s" % len(response['items'])
        print 'Total Persistent Volume Capacity: %s' % pv_capacity_total
        print 'Total Persisten Volume Available Capacity: %s' % pv_capacity_available

        self.metric_sender.add_metric(
            {'openshift.master.pv.total.count' : len(response['items']),
             'openshift.master.pv.space.total': pv_capacity_total,
             'openshift.master.pv.space.available': pv_capacity_available})

        for key, value in pv_types.iteritems():
            print "Total Persistent Volume %s count: %s" % (key, value)
            self.metric_sender.add_metric(
                {'openshift.master.pv.%s.count' %key.lower() : value})

        # Add dynamic items
        self.metric_sender.add_dynamic_metric(discovery_key_pv, item_prototype_macro_pv, dynamic_pv_count.keys())

        for size, count in dynamic_pv_count.iteritems():
            print
            print "Total Persistent Volume %s count: %s" % (size, count)
            print "Total Persistent Volume available %s count: %s" % (size, dynamic_pv_available[size])

            self.metric_sender.add_metric({"%s[%s]" %(item_prototype_key_count, size) : count,
                                           "%s[%s]" %(item_prototype_key_available, size) : dynamic_pv_available[size]})


    def nodes_not_schedulable(self):
        """check the number of nodes in the cluster that are not schedulable"""

        print "\nPerforming nodes not schedulable check..."

        response = self.ora.get('/api/v1/nodes')

        nodes_not_schedulable = []

        for n in response['items']:
            if n['metadata']['labels']['type'] == 'master':
                if self.args.verbose:
                    print "Node: %s is a master\n" % n['metadata']['name']
            else:
                if "unschedulable" in n['spec']:
                    nodes_not_schedulable.append(n['metadata']['name'])

        print "Count of nodes not schedulable: %s" % len(nodes_not_schedulable)
        print "Nodes not schedulable: %s\n" % nodes_not_schedulable

        self.metric_sender.add_metric(
            {'openshift.master.nodesnotschedulable.count' : len(nodes_not_schedulable)})


    def nodes_not_ready(self):
        """ check the number of nodes in the cluster that are not ready"""

        print "\nPerforming nodes not ready check..."

        response = self.ora.get('/api/v1/nodes')

        nodes_not_ready = []

        for n in response['items']:
            has_ready_status = False
            for cond in n['status']['conditions']:
                if self.args.verbose:
                    print "Get ready status of %s" % n['metadata']['name']
                if cond['type'] == "Ready":
                    has_ready_status = True
                    if cond['status'].lower() != "true":
                        if self.args.verbose:
                            print "Non-true ready status of %s : %s" % (n['metadata']['name'], cond['status'])
                        nodes_not_ready.append(n['metadata']['name'])
            if has_ready_status == False:
                if self.args.verbose:
                    print "Did not find ready status for %s" % n['metadata']['name']
                nodes_not_ready.append(n['metadata']['name'])

        print "Count of nodes not ready: %s" % len(nodes_not_ready)

        self.metric_sender.add_metric(
            {'openshift.master.nodesnotready.count' : len(nodes_not_ready)})


    def nodes_not_labeled(self):
        """ check the nodes in the cluster that are not labeled
            Note: This check only searches for nodes with no label keys set"""

        print "\nPerforming nodes not labeled check..."

        response = self.ora.get('/api/v1/nodes')

        nodes_not_labeled = []
        nodes_labeled = []

        for n in response['items']:
            if 'labels' in n['metadata']:
                nodes_labeled.append(n['metadata']['name'])
            else:
                nodes_not_labeled.append(n['metadata']['name'])

        print "Nodes not labeled: %s\nNodes labeled: %s \n" % (nodes_not_labeled, nodes_labeled)
        self.metric_sender.add_metric(
            {'openshift.master.nodesnotlabeled.count' : len(nodes_not_labeled)})
Esempio n. 9
0
class CertificateReporting(object):
    ''' class with ability to parse through x509 certificates to extract
        and report to zabbix the expiration date assocated with the cert '''
    def __init__(self):
        ''' constructor '''
        self.args = None
        self.current_date = datetime.datetime.today()
        self.parse_args()
        self.msend = MetricSender(debug=self.args.debug)

    def dprint(self, msg):
        ''' debug printer '''

        if self.args.debug:
            print msg

    def parse_args(self):
        ''' parse command line args '''
        argparser = argparse.ArgumentParser(description='certificate checker')
        argparser.add_argument('--debug', default=False, action='store_true')
        argparser.add_argument(
            '--cert-list',
            default="/etc/origin",
            type=str,
            help='comma-separated list of dirs/certificates')
        self.args = argparser.parse_args()

    def days_to_expiration(self, cert_file):
        ''' return days to expiration for a certificate '''

        crypto = OpenSSL.crypto

        cert = open(cert_file).read()
        certificate = crypto.load_certificate(crypto.FILETYPE_PEM, cert)
        expiration_date_asn1 = certificate.get_notAfter()
        # expiration returned in ASN.1 GENERALIZEDTIME format
        # YYYYMMDDhhmmss with a trailing 'Z'
        expiration_date = parser.parse(expiration_date_asn1).replace(
            tzinfo=None)

        delta = expiration_date - self.current_date
        return delta.days

    def process_certificates(self):
        ''' check through list of certificates/directories '''

        for cert in self.args.cert_list.split(','):
            if not os.path.exists(cert):
                self.dprint("{} does not exist. skipping.".format(cert))
                continue

            mode = os.stat(cert).st_mode
            if S_ISDIR(mode):
                self.all_certs_in_dir(cert)
            elif S_ISREG(mode):
                days = self.days_to_expiration(cert)
                self.dprint("{} in {} days".format(cert, days))
                self.add_metrics(cert, days)
            else:
                self.dprint("not a file. not a directory. skipping.")

        # now push out all queued up item(s) to metric servers
        self.msend.send_metrics()

    def add_metrics(self, certificate, days_to_expiration):
        ''' queue up item for submission to zabbix '''

        self.msend.add_dynamic_metric(CERT_DISC_KEY, CERT_DISC_MACRO,
                                      [certificate])
        zbx_key = "{}[{}]".format(CERT_DISC_KEY, certificate)
        self.msend.add_metric({zbx_key: days_to_expiration})

    def all_certs_in_dir(self, directory):
        ''' recursively go through all *.crt files in 'directory' '''

        for root, _, filenames in os.walk(directory):
            for filename in filenames:
                if filename.endswith('.crt'):
                    full_path = os.path.join(root, filename)
                    days = self.days_to_expiration(full_path)
                    self.dprint("{} in {} days".format(full_path, days))
                    self.add_metrics(full_path, days)
Esempio n. 10
0
class OpsMetricClient(object):
    """ class to send data via MeticSender """
    def __init__(self):
        self.metric_sender = None
        self.args = None
        self.config = None
        self.heartbeat = None

    def run(self):
        """ main function to run the script """

        self.parse_args()
        self.parse_config(self.args.config_file)
        self.config_metric_sender()

        if self.args.send_heartbeat:
            self.add_heartbeat()

        if self.args.key and self.args.value:
            self.add_metric()

        if self.args.discovery_key and self.args.macro_string and self.args.macro_names:
            self.add_dynamic_metric()

        self.metric_sender.send_metrics()

    def parse_args(self):
        """ parse the args from the cli """
        parser = argparse.ArgumentParser(description='metric sender')
        parser.add_argument('--send-heartbeat',
                            help="send heartbeat metric to zagg",
                            action="store_true")

        group = parser.add_mutually_exclusive_group()
        group.add_argument('-s',
                           '--host',
                           help='specify host name as registered in Zabbix')
        group.add_argument('--synthetic',
                           default=False,
                           action='store_true',
                           help='send as cluster-wide synthetic host')

        parser.add_argument('-v',
                            '--verbose',
                            action='store_true',
                            default=None,
                            help='Verbose?')
        parser.add_argument('--debug',
                            action='store_true',
                            default=None,
                            help='Debug?')
        parser.add_argument('-c',
                            '--config-file',
                            help='ops-metric-client config file',
                            default='/etc/openshift_tools/metric_sender.yaml')

        key_value_group = parser.add_argument_group('Sending a Key-Value Pair')
        key_value_group.add_argument('-k', '--key', help='metric key')
        key_value_group.add_argument('-o', '--value', help='metric value')
        key_value_group.add_argument(
            '-t',
            '--tags',
            help='list of space delimited key tags: units=byte ...',
            nargs='*')

        low_level_discovery_group = parser.add_argument_group(
            'Sending a Low Level Discovery Item')
        low_level_discovery_group.add_argument('--discovery-key',
                                               help='discovery key')
        low_level_discovery_group.add_argument('--macro-string',
                                               help='macro string')
        low_level_discovery_group.add_argument(
            '--macro-names', help='comma separated list of macro names')

        self.args = parser.parse_args()

    def parse_config(self, config_file):
        """ parse config file """
        self.config = yaml.load(file(config_file))

    def config_metric_sender(self):
        """ configure the metric_sender """

        if self.args.host:
            host = self.args.host
        elif self.args.synthetic:
            host = self.config['synthetic_clusterwide']['host']['name']
        else:
            host = self.config['host']['name']

        metric_verbose = self.args.verbose
        metric_debug = self.args.debug
        if isinstance(metric_verbose, str):
            metric_verbose = (metric_verbose == 'True')

        if isinstance(metric_debug, str):
            metric_debug = (metric_debug == 'True')

        self.metric_sender = MetricSender(host=host,
                                          verbose=metric_verbose,
                                          debug=metric_debug,
                                          config_file=self.args.config_file)

    def add_heartbeat(self):
        """ crate a heartbeat metric """
        if self.args.synthetic:
            heartbeat = MetricSenderHeartbeat(
                templates=self.config['synthetic_clusterwide']['heartbeat']
                ['templates'],
                hostgroups=self.config['heartbeat']['hostgroups'])
        else:
            heartbeat = MetricSenderHeartbeat(
                templates=self.config['heartbeat']['templates'],
                hostgroups=self.config['heartbeat']['hostgroups'])
        self.metric_sender.add_heartbeat(heartbeat)

    def add_metric(self):
        """ send key/value pair """

        # Get tags from command line args
        tags = dict([i.split("=")[0], i.split("=")[1]]
                    for i in self.args.tags) if self.args.tags else {}

        self.metric_sender.add_metric({self.args.key: self.args.value},
                                      key_tags=tags)

    def add_dynamic_metric(self):
        """ send zabbix low level discovery item to zagg """

        self.metric_sender.add_dynamic_metric(self.args.discovery_key,
                                              self.args.macro_string,
                                              self.args.macro_names.split(','))
class DockerContainerUsageCli(object):
    ''' This is the class that actually pulls eveyrthing together into a cli script.
    '''
    def __init__(self, config_file=None):
        if not config_file:
            self.config_file = '/etc/openshift_tools/container_metrics.yml'
        else:
            self.config_file = config_file

        self.config = None

        self.parse_config()

        self.cli = AutoVersionClient(base_url='unix://var/run/docker.sock',
                                     timeout=120)
        self.docker_util = DockerUtil(self.cli)
        self.metric_sender = MetricSender(verbose=True)

    def parse_config(self):
        """ parse config file """

        if not self.config:
            if not os.path.exists(self.config_file):
                raise IOError(self.config_file + " does not exist.")

            self.config = yaml.load(file(self.config_file))

    def format_ctr_name(self, ctr_name):
        ''' Takes a container name and if there's a name_format_regex specified, it applies it '''
        for item in self.config['usage_checks']:
            name_match_regex = item['name_match_regex']

            if item.has_key('name_format_regex') and re.match(
                    name_match_regex, ctr_name):
                try:
                    name_format_regex = item['name_format_regex']
                    new_name = re.sub(name_match_regex, name_format_regex,
                                      ctr_name)
                    return new_name
                except sre_constants.error as ex:
                    # Just use the full name (we don't want to die because of name formatting)
                    print "\nError: %s: [%s]. Using full name [%s].\n" % (
                        ex.message, name_format_regex, ctr_name)
                    return ctr_name

        return ctr_name

    def main(self):
        ''' The main entrypoint of the cli '''
        ctr_regexes = [
            uchk['name_match_regex'] for uchk in self.config['usage_checks']
        ]
        use_cgroups = self.config.get('use_cgroups', False)

        ctrs = self.docker_util.get_ctrs_matching_names(ctr_regexes)

        for ctr_name, ctr in ctrs.iteritems():
            (cpu_stats, mem_stats) = self.docker_util.get_ctr_stats(
                ctr, use_cgroups=use_cgroups)

            formatted_ctr_name = self.format_ctr_name(ctr_name)

            # Add the container hostnames as macros for the dynamic item.
            self.metric_sender.add_dynamic_metric(ZBX_DOCKER_DISC_KEY,
                                                  ZBX_DOCKER_DISC_MACRO,
                                                  [formatted_ctr_name])
            data = {
                '%s[%s]' % (ZBX_CTR_CPU_USED_PCT_KEY, formatted_ctr_name):
                cpu_stats.used_pct,
                '%s[%s]' % (ZBX_CTR_MEM_USED_KEY, formatted_ctr_name):
                mem_stats.used,
                '%s[%s]' % (ZBX_CTR_MEM_LIMIT_KEY, formatted_ctr_name):
                mem_stats.limit,
                '%s[%s]' % (ZBX_CTR_MEM_LIMIT_USED_PCT_KEY, formatted_ctr_name):
                mem_stats.limit_used_pct,
                '%s[%s]' % (ZBX_CTR_MEM_FAILCNT_KEY, formatted_ctr_name):
                mem_stats.failcnt,
            }

            print "%s:" % formatted_ctr_name
            for k, v in data.iteritems():
                print "  %s: %s" % (k, v)
            print

            self.metric_sender.add_metric(data)

        # Actually send the metrics
        self.metric_sender.send_metrics()