def main(): args = parse_args() metrics = [] for func in args.selected: try: r = func() if isinstance(r, list) and r and isinstance(r[0], MetricData): metrics.extend([result.metric() for result in r]) elif isinstance(r, MetricData): metrics.append(r.metric()) except: # noqa t, v, tb = sys.exc_info() backtrace = ' '.join(traceback.format_exception(t, v, tb)) r = MetricData.single('cephlm.probe.failure', Severity.fail, '{error} failed with: {check}', dimensions={ 'component': 'cephlm-probe', 'service': 'ceph-storage' }, msgkeys={ 'check': func.__module__, 'error': backtrace.replace('\n', ' ') }) metrics.append(r.metric()) # There is no point in reporting multiple measurements of # cephlm.probe.failure metric in same cycle. check_failures_found = [] for metric in metrics: if metric.get('metric') == 'cephlm.probe.failure': check_failures_found.append(metric) if check_failures_found: # Remove all except one instance for metric in check_failures_found[:-1]: metrics.remove(metric) else: r = MetricData.single('cephlm.probe.failure', Severity.ok, 'ok', dimensions={ 'component': 'cephlm-probe', 'service': 'ceph-storage' }) metrics.append(r.metric()) FORMATS[args.format](metrics, args.pretty)
def osd_stats(): """ Publishes the osd statistics """ metric_dict = { 'up': OSD._up_count, 'up_out': OSD._up_out_count, 'down': OSD._down_count, 'down_in': OSD._down_in_count, 'total': OSD._total_count } INVALID_VALUE = -1 result = list() probe_failed = False try: osd_stats = OSD._stats() except (exc.CephLMException, exc.CephCommandException, exc.CephCommandTimeoutException) as e: probe_failed = True for metric_state, func in metric_dict.iteritems(): name = "cephlm.osd.%s_count" % metric_state if probe_failed: value = INVALID_VALUE msg = "Probe error: Command 'ceph osd tree' failed" else: value, msg = func(osd_stats) msg = "OSD(s) %s" % msg if msg else "No OSD(s)" msg += " is/are in cluster" if metric_state == 'total' \ else " is/are %s" % metric_state base_result = MetricData.single(name, value, message=msg) result.append(base_result) return result
def pool_stats(): """ Publishes the pool statistics """ result = list() INVALID_VALUE = -1 probe_failed = False metric_dict = { 'count': 'count', 'total_objects': 'objects', 'usage_bytes': 'size_bytes', 'top_three_by_usage_bytes': 'top_pools_by_size', 'top_three_by_objects': 'top_pools_by_objects', } try: pool_dict = Pool._stats() except (exc.CephLMException, exc.CephCommandException, exc.CephCommandTimeoutException) as e: probe_failed = True msg = str(e) for metric_name, state in metric_dict.iteritems(): name = "cephlm.pool.%s" % metric_name if probe_failed: value = INVALID_VALUE elif 'top_three' in metric_name: msg, value = Pool._pools_by_metric(pool_dict, state) else: msg, value = Pool._return_total_metrics(pool_dict, state) base_result = MetricData.single(name, value, message=msg) result.append(base_result) return result
def pg_stats(): """ Function to aggregate all metrics """ msg = '' INVALID_VALUE = -1 probe_failed = False try: pg_stats = PG._stats() except (exc.CephLMException, exc.CephCommandException, exc.CephCommandTimeoutException) as e: probe_failed = True msg = 'Probe error: ' + str(e) if probe_failed: value = INVALID_VALUE else: value = pg_stats.pop('count') for pg_state, count in pg_stats.iteritems(): msg += '%s=%s, ' % (pg_state, count) msg = msg[:-2] name = "cephlm.pg.count" base_result = MetricData.single(name, value, message=msg) return base_result
def capacity_stats(): """ Publishes the capacity statistics """ metric_list = ['total_bytes', 'used_bytes', 'available_bytes', 'perc_utilization'] msg = '' result = list() capacity_dict = dict() INVALID_VALUE = -1 probe_failed = False try: capacity_dict = Capacity._stats() except (exc.CephLMException, exc.CephCommandException, exc.CephCommandTimeoutException) as e: probe_failed = True msg = str(e) for metric_name in metric_list: name = "cephlm.capacity.%s" % metric_name value = capacity_dict[metric_name] \ if not probe_failed else INVALID_VALUE base_result = MetricData.single(name, value, message=msg) result.append(base_result) return result