Example #1
0
def main():
    args = parse_args()
    metrics = []

    for func in args.selected:
        try:
            r = func()
            if isinstance(r, list) and r and isinstance(r[0], MetricData):
                metrics.extend([result.metric() for result in r])
            elif isinstance(r, MetricData):
                metrics.append(r.metric())
        except:  # noqa
            t, v, tb = sys.exc_info()
            backtrace = ' '.join(traceback.format_exception(t, v, tb))
            r = MetricData.single('cephlm.probe.failure',
                                  Severity.fail,
                                  '{error} failed with: {check}',
                                  dimensions={
                                      'component': 'cephlm-probe',
                                      'service': 'ceph-storage'
                                  },
                                  msgkeys={
                                      'check': func.__module__,
                                      'error': backtrace.replace('\n', ' ')
                                  })
            metrics.append(r.metric())

    # There is no point in reporting multiple measurements of
    # cephlm.probe.failure metric in same cycle.
    check_failures_found = []
    for metric in metrics:
        if metric.get('metric') == 'cephlm.probe.failure':
            check_failures_found.append(metric)
    if check_failures_found:
        # Remove all except one instance
        for metric in check_failures_found[:-1]:
            metrics.remove(metric)
    else:
        r = MetricData.single('cephlm.probe.failure',
                              Severity.ok,
                              'ok',
                              dimensions={
                                  'component': 'cephlm-probe',
                                  'service': 'ceph-storage'
                              })
        metrics.append(r.metric())

    FORMATS[args.format](metrics, args.pretty)
Example #2
0
    def osd_stats():
        """
        Publishes the osd statistics
        """
        metric_dict = {
            'up': OSD._up_count,
            'up_out': OSD._up_out_count,
            'down': OSD._down_count,
            'down_in': OSD._down_in_count,
            'total': OSD._total_count
        }
        INVALID_VALUE = -1
        result = list()
        probe_failed = False
        try:
            osd_stats = OSD._stats()
        except (exc.CephLMException, exc.CephCommandException,
                exc.CephCommandTimeoutException) as e:
            probe_failed = True

        for metric_state, func in metric_dict.iteritems():
            name = "cephlm.osd.%s_count" % metric_state
            if probe_failed:
                value = INVALID_VALUE
                msg = "Probe error: Command 'ceph osd tree' failed"
            else:
                value, msg = func(osd_stats)
                msg = "OSD(s) %s" % msg if msg else "No OSD(s)"
                msg += " is/are in cluster" if metric_state == 'total' \
                    else " is/are %s" % metric_state
            base_result = MetricData.single(name, value, message=msg)
            result.append(base_result)
        return result
Example #3
0
 def pool_stats():
     """
     Publishes the pool statistics
     """
     result = list()
     INVALID_VALUE = -1
     probe_failed = False
     metric_dict = {
         'count': 'count',
         'total_objects': 'objects',
         'usage_bytes': 'size_bytes',
         'top_three_by_usage_bytes': 'top_pools_by_size',
         'top_three_by_objects': 'top_pools_by_objects',
     }
     try:
         pool_dict = Pool._stats()
     except (exc.CephLMException, exc.CephCommandException,
             exc.CephCommandTimeoutException) as e:
         probe_failed = True
         msg = str(e)
     for metric_name, state in metric_dict.iteritems():
         name = "cephlm.pool.%s" % metric_name
         if probe_failed:
             value = INVALID_VALUE
         elif 'top_three' in metric_name:
             msg, value = Pool._pools_by_metric(pool_dict, state)
         else:
             msg, value = Pool._return_total_metrics(pool_dict, state)
         base_result = MetricData.single(name, value, message=msg)
         result.append(base_result)
     return result
Example #4
0
 def pg_stats():
     """
     Function to aggregate all metrics
     """
     msg = ''
     INVALID_VALUE = -1
     probe_failed = False
     try:
         pg_stats = PG._stats()
     except (exc.CephLMException, exc.CephCommandException,
             exc.CephCommandTimeoutException) as e:
         probe_failed = True
         msg = 'Probe error: ' + str(e)
     if probe_failed:
         value = INVALID_VALUE
     else:
         value = pg_stats.pop('count')
         for pg_state, count in pg_stats.iteritems():
             msg += '%s=%s, ' % (pg_state, count)
         msg = msg[:-2]
     name = "cephlm.pg.count"
     base_result = MetricData.single(name, value, message=msg)
     return base_result
Example #5
0
 def capacity_stats():
     """
     Publishes the capacity statistics
     """
     metric_list = ['total_bytes', 'used_bytes',
                    'available_bytes', 'perc_utilization']
     msg = ''
     result = list()
     capacity_dict = dict()
     INVALID_VALUE = -1
     probe_failed = False
     try:
         capacity_dict = Capacity._stats()
     except (exc.CephLMException, exc.CephCommandException,
             exc.CephCommandTimeoutException) as e:
         probe_failed = True
         msg = str(e)
     for metric_name in metric_list:
         name = "cephlm.capacity.%s" % metric_name
         value = capacity_dict[metric_name] \
             if not probe_failed else INVALID_VALUE
         base_result = MetricData.single(name, value, message=msg)
         result.append(base_result)
     return result