Exemple #1
0
def get_availability():
  health = commands.getoutput('timeout 10 ceph health')
  if health == 'HEALTH_OK':
    return 100

  n_pgs = cephinfo.get_n_pgs()
  pg_states = cephinfo.get_pg_states()
  a_pgs = math.floor(100.0 * pg_states['active'] / n_pgs)

  n_mons = cephinfo.get_n_mons()
  n_quorum = cephinfo.get_n_mons_quorum()
  a_mon = 1 if n_quorum >= math.ceil( n_mons / 2 ) else 0

  availability = int(a_mon * a_pgs)
  return availability
Exemple #2
0
def get_availability():
    health = commands.getoutput('timeout 10 ceph health')
    if health == 'HEALTH_OK':
        return 100

    n_pgs = cephinfo.get_n_pgs()
    pg_states = cephinfo.get_pg_states()
    a_pgs = math.floor(100.0 * pg_states['active'] / n_pgs)

    n_mons = cephinfo.get_n_mons()
    n_quorum = cephinfo.get_n_mons_quorum()
    a_mon = 1 if n_quorum >= math.ceil(n_mons / 2) else 0

    availability = int(a_mon * a_pgs)
    return availability
Exemple #3
0
def write_xml():
  osd_states = cephinfo.get_osd_states()
  osd_stats_sum = cephinfo.get_osd_stats_sum()
  pg_stats_sum = cephinfo.get_pg_stats_sum()['stat_sum']
  pg_map = cephinfo.stat_data['pgmap']
  latency = cephinfo.get_write_latency()
  pg_states = cephinfo.get_pg_states()
  read_latency = cephinfo.get_read_latency()
  cephinfo.rados_cleanup(latency[0])
  activity = cephinfo.get_smooth_activity(10)
  context = {
    "timestamp"          : commands.getoutput('date +%Y-%m-%dT%H:%M:%S'),
    "availability"       : get_availability(),
    "n_mons"             : cephinfo.get_n_mons(),
    "n_quorum"           : cephinfo.get_n_mons_quorum(),
    "n_pools"            : cephinfo.get_n_pools(),
    "n_osds"             : cephinfo.get_n_osds(),
    "n_osds_up"          : osd_states['up'],
    "n_osds_in"          : osd_states['in'],
    "n_pgs"              : cephinfo.get_n_pgs(),
    "n_pgs_active"       : pg_states['active'],
    "n_pgs_scrubbing"    : pg_states['scrubbing'],
    "n_pgs_deep"         : pg_states['deep'],
    "n_osd_gb_total"     : osd_stats_sum['kb'] / 1024 / 1024,
    "n_osd_gb_used"      : osd_stats_sum['kb_used'] / 1024 / 1024,
    "n_osd_gb_avail"     : osd_stats_sum['kb_avail'] / 1024 / 1024,
    "n_pg_gbytes"        : pg_stats_sum['num_bytes'] / 1024 / 1024 / 1024,
    "n_objects"          : pg_stats_sum['num_objects'],
    "n_object_copies"    : pg_stats_sum['num_object_copies'],
    "n_objects_degraded" : pg_stats_sum['num_objects_degraded'],
    "n_objects_unfound"  : pg_stats_sum['num_objects_unfound'],
    "n_read_gb"          : pg_stats_sum['num_read_kb'] / 1024 / 1024,
    "n_write_gb"         : pg_stats_sum['num_write_kb'] / 1024 / 1024,
    "latency_ms"         : latency[1][0]*1000,
    "latency_max_ms"     : latency[1][1]*1000,
    "latency_min_ms"     : latency[1][2]*1000,
    "read_latency_ms"    : read_latency[0]*1000,
    "read_latency_max_ms": read_latency[1]*1000,
    "read_latency_min_ms": read_latency[2]*1000,
    "n_openstack_volumes": cephinfo.get_n_openstack_volumes(),
    "n_openstack_images" : cephinfo.get_n_openstack_images(),
    "op_per_sec"         : activity[0],
    "read_mb_sec"        : activity[1],
    "write_mb_sec"       : activity[2],
  } 
  template = """<?xml version="1.0" encoding="utf-8"?>

<serviceupdate xmlns="http://sls.cern.ch/SLS/XML/update">
   
        <id>Ceph</id>
        <fullname>Ceph Storage Service</fullname>
        <group>IT/DSS</group>

        <contact>ceph.support\@cern.ch</contact>
        <webpage>https://twiki.cern.ch/twiki/bin/viewauth/DSSGroup/CephProject</webpage>
        <alarmpage>http://cern.ch/ceph/alarms.html</alarmpage>
 
        <timestamp>{timestamp}</timestamp>
 
        <availability>{availability}</availability>
 
        <refreshperiod>PT15M</refreshperiod>
 
        <availabilitythresholds>
                <threshold level="available">98</threshold>
                <threshold level="affected">90</threshold>
                <threshold level="degraded">80</threshold>
        </availabilitythresholds>

        <data>
                <grp name="Monitors">
		    <numericvalue name="n_mons" desc="Num Mons">{n_mons}</numericvalue>
		    <numericvalue name="n_quorum" desc="Num Mons in Quorum">{n_quorum}</numericvalue>
                </grp>
		<numericvalue name="n_pools" desc="Num Pools">{n_pools}</numericvalue>
                <grp name="OSDs">
		    <numericvalue name="n_osds" desc="Num OSDs">{n_osds}</numericvalue>
		    <numericvalue name="n_osds_up" desc="Num OSDs Up">{n_osds_up}</numericvalue>
		    <numericvalue name="n_osds_in" desc="Num OSDs In">{n_osds_in}</numericvalue>
                </grp>
                <grp name="PGs">
		    <numericvalue name="n_pgs" desc="Num PGs">{n_pgs}</numericvalue>
		    <numericvalue name="n_pgs_active" desc="Num PGs Active">{n_pgs_active}</numericvalue>
		    <numericvalue name="n_pgs_scrubbing" desc="Num PGs Scrubbing">{n_pgs_scrubbing}</numericvalue>
		    <numericvalue name="n_pgs_deep" desc="Num PGs Deep Scrubbing">{n_pgs_deep}</numericvalue>
                </grp>
                <grp name="Disk Space">
		    <numericvalue name="n_osd_gb_total" desc="OSD Gigabytes Total">{n_osd_gb_total}</numericvalue>
		    <numericvalue name="n_osd_gb_used" desc="OSD Gigabytes Used">{n_osd_gb_used}</numericvalue>
		    <numericvalue name="n_osd_gb_avail" desc="OSD Gigabytes Avail">{n_osd_gb_avail}</numericvalue>
		    <numericvalue name="n_pg_gbytes" desc="PG Gigabytes">{n_pg_gbytes}</numericvalue>
                </grp>
                <grp name="Objects">
		    <numericvalue name="n_objects" desc="Num Objects">{n_objects}</numericvalue>
		    <numericvalue name="n_object_copies" desc="Num Object Copies">{n_object_copies}</numericvalue>
		    <numericvalue name="n_objects_degraded" desc="Num Objects Degraded">{n_objects_degraded}</numericvalue>
		    <numericvalue name="n_objects_unfound" desc="Num Objects Unfound">{n_objects_unfound}</numericvalue>
                </grp>
                <grp name="Total RW">
		    <numericvalue name="n_read_gb" desc="Total Read (GB)">{n_read_gb}</numericvalue>
		    <numericvalue name="n_write_gb" desc="Total Write (GB)">{n_write_gb}</numericvalue>
                </grp>
                <grp name="4KB Write Latency (ms)">
		    <numericvalue name="latency_ms" desc="Average">{latency_ms}</numericvalue>
		    <numericvalue name="latency_max_ms" desc="Max">{latency_max_ms}</numericvalue>
		    <numericvalue name="latency_min_ms" desc="Min">{latency_min_ms}</numericvalue>
                </grp>
                <grp name="4KB Read Latency (ms)">
		    <numericvalue name="read_latency_ms" desc="Average">{read_latency_ms}</numericvalue>
		    <numericvalue name="read_latency_max_ms" desc="Max">{read_latency_max_ms}</numericvalue>
		    <numericvalue name="read_latency_min_ms" desc="Min">{read_latency_min_ms}</numericvalue>
                </grp>
                <grp name="OpenStack">
		    <numericvalue name="n_openstack_volumes" desc="Num OpenStack Volumes">{n_openstack_volumes}</numericvalue>
		    <numericvalue name="n_openstack_images" desc="Num OpenStack Images">{n_openstack_images}</numericvalue>
                </grp>
                <grp name="Throughput">
		    <numericvalue name="read_mb_sec" desc="Read MB/s">{read_mb_sec}</numericvalue>
		    <numericvalue name="write_mb_sec" desc="Write MB/s">{write_mb_sec}</numericvalue>
                </grp>
		<numericvalue name="op_per_sec" desc="Operations Per Second">{op_per_sec}</numericvalue>
	</data>

        <lemon>
                <cluster>ceph_beesly_mon</cluster>
                <cluster>ceph_beesly_osd</cluster>
        </lemon>

	<servicemanagers>
		<servicemanager email="*****@*****.**" login="******" main="true">Dan van der Ster</servicemanager>
	</servicemanagers>

</serviceupdate>"""
  print template.format(**context)
def write_xml(slsid='Ceph'):
    osd_states = cephinfo.get_osd_states()
    osd_stats_sum = cephinfo.get_osd_stats_sum()
    pg_stats_sum = cephinfo.get_pg_stats_sum()['stat_sum']
    #   pg_map = cephinfo.stat_data['pgmap']
    try:
        latency = cephinfo.get_write_latency()
        read_latency = cephinfo.get_read_latency()
        cephinfo.rados_cleanup(latency[0])
    except IndexError:
        latency = ['', [0, 0, 0]]
        read_latency = [0, 0, 0]
    pg_states = cephinfo.get_pg_states()
    osd_df = cephinfo.osd_df_data['nodes']
    activity = cephinfo.get_smooth_activity(10)
    status, availabilityinfo = get_status(pg_stats_sum, latency[1][0] * 1000)
    context = {
        "slsid": slsid,
        "timestamp": datetime.strftime(datetime.now(), '%Y-%m-%dT%H:%M:%S'),
        "status": status,
        "availabilityinfo": availabilityinfo,
        "n_mons": cephinfo.get_n_mons(),
        "n_quorum": cephinfo.get_n_mons_quorum(),
        "n_pools": cephinfo.get_n_pools(),
        "n_osds": cephinfo.get_n_osds(),
        "n_osds_up": osd_states['up'],
        "n_osds_in": osd_states['in'],
        "n_pgs": cephinfo.get_n_pgs(),
        "n_osd_gb_total": osd_stats_sum['kb'] / 1024 / 1024,
        "n_osd_gb_used": osd_stats_sum['kb_used'] / 1024 / 1024,
        "n_osd_gb_avail": osd_stats_sum['kb_avail'] / 1024 / 1024,
        "n_pg_gbytes": pg_stats_sum['num_bytes'] / 1024 / 1024 / 1024,
        "n_objects": pg_stats_sum['num_objects'],
        "n_object_copies": pg_stats_sum['num_object_copies'],
        "n_objects_degraded": pg_stats_sum['num_objects_degraded'],
        "n_objects_unfound": pg_stats_sum['num_objects_unfound'],
        "n_objects_misplaced": pg_stats_sum['num_objects_misplaced'],
        "n_read_gb": pg_stats_sum['num_read_kb'] / 1024 / 1024,
        "n_write_gb": pg_stats_sum['num_write_kb'] / 1024 / 1024,
        "latency_ms": latency[1][0] * 1000,
        "latency_max_ms": latency[1][1] * 1000,
        "latency_min_ms": latency[1][2] * 1000,
        "read_latency_ms": read_latency[0] * 1000,
        "read_latency_max_ms": read_latency[1] * 1000,
        "read_latency_min_ms": read_latency[2] * 1000,
        "n_openstack_volumes": cephinfo.get_n_openstack_volumes(),
        "n_openstack_images": cephinfo.get_n_openstack_images(),
        "op_per_sec": activity[0],
        "read_mb_sec": activity[1],
        "write_mb_sec": activity[2],
        "graphite_prefix": slsid.replace('_', '.').lower() + '.sls',
        "graphite_osd_prefix": slsid.replace('_', '.').lower() + '.osds',
        "graphite_timestamp": int(datetime.time()),
    }

    for state in pg_states.keys():
        context['n_pgs_%s' % state] = pg_states[state]

    template = """
<?xml version="1.0" encoding="utf-8"?>

<serviceupdate xmlns="http://sls.cern.ch/SLS/XML/update">
        <id>{slsid}</id>

        <contact>[email protected]</contact>
        <webpage>https://twiki.cern.ch/twiki/bin/viewauth/DSSGroup/CephProject</webpage>

        <availabilitydesc>Status is available, degraded, or unavailable when the Ceph status is HEALTH_OK, HEALTH_WARN, or HEALTH_ERR, respectively.</availabilitydesc>

        <timestamp>{timestamp}</timestamp>

        <status>{status}</status>

        <availabilityinfo>{availabilityinfo}</availabilityinfo>

        <data>
            <numericvalue name="n_mons" desc="Num Mons">{n_mons}</numericvalue>
            <numericvalue name="n_quorum" desc="Num Mons in Quorum">{n_quorum}</numericvalue>
            <numericvalue name="n_pools" desc="Num Pools">{n_pools}</numericvalue>
            <numericvalue name="n_osds" desc="Num OSDs">{n_osds}</numericvalue>
            <numericvalue name="n_osds_up" desc="Num OSDs Up">{n_osds_up}</numericvalue>
            <numericvalue name="n_osds_in" desc="Num OSDs In">{n_osds_in}</numericvalue>
            <numericvalue name="n_pgs" desc="Num PGs">{n_pgs}</numericvalue>
"""

    for state in pg_states.keys():
        template = template + '        <numericvalue name="n_pgs_%s" desc="Num PGs %s">{n_pgs_%s}</numericvalue>\n' % (
            state, state, state)

    template = template + """        <numericvalue name="n_osd_gb_total" desc="OSD Gigabytes Total">{n_osd_gb_total}</numericvalue>
            <numericvalue name="n_osd_gb_used" desc="OSD Gigabytes Used">{n_osd_gb_used}</numericvalue>
            <numericvalue name="n_osd_gb_avail" desc="OSD Gigabytes Avail">{n_osd_gb_avail}</numericvalue>
            <numericvalue name="n_pg_gbytes" desc="PG Gigabytes">{n_pg_gbytes}</numericvalue>
            <numericvalue name="n_objects" desc="Num Objects">{n_objects}</numericvalue>
            <numericvalue name="n_object_copies" desc="Num Object Copies">{n_object_copies}</numericvalue>
            <numericvalue name="n_objects_degraded" desc="Num Objects Degraded">{n_objects_degraded}</numericvalue>
            <numericvalue name="n_objects_unfound" desc="Num Objects Unfound">{n_objects_unfound}</numericvalue>
            <numericvalue name="n_objects_misplaced" desc="Num Objects Misplaced">{n_objects_misplaced}</numericvalue>
            <numericvalue name="n_read_gb" desc="Total Read (GB)">{n_read_gb}</numericvalue>
            <numericvalue name="n_write_gb" desc="Total Write (GB)">{n_write_gb}</numericvalue>
            <numericvalue name="latency_ms" desc="Average">{latency_ms}</numericvalue>
            <numericvalue name="latency_max_ms" desc="Max">{latency_max_ms}</numericvalue>
            <numericvalue name="latency_min_ms" desc="Min">{latency_min_ms}</numericvalue>
            <numericvalue name="read_latency_ms" desc="Average">{read_latency_ms}</numericvalue>
            <numericvalue name="read_latency_max_ms" desc="Max">{read_latency_max_ms}</numericvalue>
            <numericvalue name="read_latency_min_ms" desc="Min">{read_latency_min_ms}</numericvalue>
            <numericvalue name="n_openstack_volumes" desc="Num OpenStack Volumes">{n_openstack_volumes}</numericvalue>
            <numericvalue name="n_openstack_images" desc="Num OpenStack Images">{n_openstack_images}</numericvalue>
            <numericvalue name="read_mb_sec" desc="Read MB/s">{read_mb_sec}</numericvalue>
            <numericvalue name="write_mb_sec" desc="Write MB/s">{write_mb_sec}</numericvalue>
            <numericvalue name="op_per_sec" desc="Operations Per Second">{op_per_sec}</numericvalue>
        </data>
</serviceupdate>
"""
    print(template.format(**context))

    # generate Graphite update
    graphite = """
{graphite_prefix}.n_mons {n_mons} {graphite_timestamp}
{graphite_prefix}.n_quorum {n_quorum} {graphite_timestamp}
{graphite_prefix}.n_pools {n_pools} {graphite_timestamp}
{graphite_prefix}.n_osds {n_osds} {graphite_timestamp}
{graphite_prefix}.n_osds_up {n_osds_up} {graphite_timestamp}
{graphite_prefix}.n_osds_in {n_osds_in} {graphite_timestamp}
{graphite_prefix}.n_pgs {n_pgs} {graphite_timestamp}
"""

    for state in pg_states.keys():
        graphite = graphite + "{graphite_prefix}.n_pgs_%s {n_pgs_%s} {graphite_timestamp}\n" % (
            state, state)

    graphite = graphite + """{graphite_prefix}.n_osd_gb_total {n_osd_gb_total} {graphite_timestamp}
{graphite_prefix}.n_osd_gb_used {n_osd_gb_used} {graphite_timestamp}
{graphite_prefix}.n_osd_gb_avail {n_osd_gb_avail} {graphite_timestamp}
{graphite_prefix}.n_pg_gbytes {n_pg_gbytes} {graphite_timestamp}
{graphite_prefix}.n_objects {n_objects} {graphite_timestamp}
{graphite_prefix}.n_object_copies {n_object_copies} {graphite_timestamp}
{graphite_prefix}.n_objects_degraded {n_objects_degraded} {graphite_timestamp}
{graphite_prefix}.n_objects_unfound {n_objects_unfound} {graphite_timestamp}
{graphite_prefix}.n_objects_misplaced {n_objects_misplaced} {graphite_timestamp}
{graphite_prefix}.n_read_gb {n_read_gb} {graphite_timestamp}
{graphite_prefix}.n_write_gb {n_write_gb} {graphite_timestamp}
{graphite_prefix}.latency_ms {latency_ms} {graphite_timestamp}
{graphite_prefix}.latency_max_ms {latency_max_ms} {graphite_timestamp}
{graphite_prefix}.latency_min_ms {latency_min_ms} {graphite_timestamp}
{graphite_prefix}.read_latency_ms {read_latency_ms} {graphite_timestamp}
{graphite_prefix}.read_latency_max_ms {read_latency_max_ms} {graphite_timestamp}
{graphite_prefix}.read_latency_min_ms {read_latency_min_ms} {graphite_timestamp}
{graphite_prefix}.n_openstack_volumes {n_openstack_volumes} {graphite_timestamp}
{graphite_prefix}.n_openstack_images {n_openstack_images} {graphite_timestamp}
{graphite_prefix}.read_mb_sec {read_mb_sec} {graphite_timestamp}
{graphite_prefix}.write_mb_sec {write_mb_sec} {graphite_timestamp}
{graphite_prefix}.op_per_sec {op_per_sec} {graphite_timestamp}
"""

    for osd in osd_df:
        graphite = graphite + "{graphite_osd_prefix}.%s.crush_weight %s {graphite_timestamp}\n" % (
            osd['id'], osd['crush_weight'])
        graphite = graphite + "{graphite_osd_prefix}.%s.reweight %s {graphite_timestamp}\n" % (
            osd['id'], osd['reweight'])
        graphite = graphite + "{graphite_osd_prefix}.%s.kb %s {graphite_timestamp}\n" % (
            osd['id'], osd['kb'])
        graphite = graphite + "{graphite_osd_prefix}.%s.kb_used %s {graphite_timestamp}\n" % (
            osd['id'], osd['kb_used'])
        graphite = graphite + "{graphite_osd_prefix}.%s.kb_avail %s {graphite_timestamp}\n" % (
            osd['id'], osd['kb_avail'])
        graphite = graphite + "{graphite_osd_prefix}.%s.utilization %s {graphite_timestamp}\n" % (
            osd['id'], osd['utilization'])
        graphite = graphite + "{graphite_osd_prefix}.%s.var %s {graphite_timestamp}\n" % (
            osd['id'], osd['var'])

    update = graphite.format(**context)
    sock = socket.socket()
    sock.connect((CARBON_SERVER, CARBON_PORT))
    sock.sendall(update)
    sock.close()
def write_xml():
  osd_states = cephinfo.get_osd_states()
  osd_stats_sum = cephinfo.get_osd_stats_sum()
  pg_stats_sum = cephinfo.get_pg_stats_sum()['stat_sum']
  context = {
    "timestamp"          : commands.getoutput('date +%Y-%m-%dT%H:%M:%S'),
    "availability"       : get_availability(),
    "n_mons"             : cephinfo.get_n_mons(),
    "n_quorum"           : cephinfo.get_n_mons_quorum(),
    "n_pools"            : cephinfo.get_n_pools(),
    "n_osds"             : cephinfo.get_n_osds(),
    "n_osds_up"          : osd_states['up'],
    "n_osds_in"          : osd_states['in'],
    "n_pgs"              : cephinfo.get_n_pgs(),
    "n_pgs_active"       : cephinfo.get_pg_states()['active'],
    "n_osd_gb_total"     : osd_stats_sum['kb'] / 1024 / 1024,
    "n_osd_gb_used"      : osd_stats_sum['kb_used'] / 1024 / 1024,
    "n_osd_gb_avail"     : osd_stats_sum['kb_avail'] / 1024 / 1024,
    "n_pg_gbytes"        : pg_stats_sum['num_bytes'] / 1024 / 1024 / 1024,
    "n_objects"          : pg_stats_sum['num_objects'],
    "n_object_copies"    : pg_stats_sum['num_object_copies'],
    "n_objects_degraded" : pg_stats_sum['num_objects_degraded'],
    "n_objects_unfound"  : pg_stats_sum['num_objects_unfound'],
    "n_read_gb"          : pg_stats_sum['num_read_kb'] / 1024 / 1024,
    "n_write_gb"         : pg_stats_sum['num_write_kb'] / 1024 / 1024,
  } 
  template = """<?xml version="1.0" encoding="utf-8"?>

<serviceupdate xmlns="http://sls.cern.ch/SLS/XML/update">
   
        <id>Ceph</id>
        <fullname>Ceph Storage Service</fullname>
        <group>IT/DSS</group>

        <contact>ceph.support\@cern.ch</contact>
        <webpage>https://twiki.cern.ch/twiki/bin/viewauth/DSSGroup/CephProject</webpage>
        <alarmpage>http://cern.ch/ceph/alarms.html</alarmpage>
 
        <timestamp>{timestamp}</timestamp>
 
        <availability>{availability}</availability>
 
        <refreshperiod>PT15M</refreshperiod>
 
        <availabilitythresholds>
                <threshold level="available">100</threshold>
                <threshold level="affected">95</threshold>
                <threshold level="degraded">90</threshold>
        </availabilitythresholds>

        <data>
		<numericvalue name="n_mons" desc="Num Mons">{n_mons}</numericvalue>
		<numericvalue name="n_quorum" desc="Num Mons in Quorum">{n_quorum}</numericvalue>
		<numericvalue name="n_pools" desc="Num Pools">{n_pools}</numericvalue>
		<numericvalue name="n_osds" desc="Num OSDs">{n_osds}</numericvalue>
		<numericvalue name="n_osds_up" desc="Num OSDs Up">{n_osds_up}</numericvalue>
		<numericvalue name="n_osds_in" desc="Num OSDs In">{n_osds_in}</numericvalue>
		<numericvalue name="n_pgs" desc="Num PGs">{n_pgs}</numericvalue>
		<numericvalue name="n_pgs_active" desc="Num PGs Active">{n_pgs_active}</numericvalue>
		<numericvalue name="n_osd_gb_total" desc="OSD Gigabytes Total">{n_osd_gb_total}</numericvalue>
		<numericvalue name="n_osd_gb_used" desc="OSD Gigabytes Used">{n_osd_gb_used}</numericvalue>
		<numericvalue name="n_osd_gb_avail" desc="OSD Gigabytes Avail">{n_osd_gb_avail}</numericvalue>
		<numericvalue name="n_pg_gbytes" desc="PG Gigabytes">{n_pg_gbytes}</numericvalue>
		<numericvalue name="n_objects" desc="Num Objects">{n_objects}</numericvalue>
		<numericvalue name="n_object_copies" desc="Num Object Copies">{n_object_copies}</numericvalue>
		<numericvalue name="n_objects_degraded" desc="Num Objects Degraded">{n_objects_degraded}</numericvalue>
		<numericvalue name="n_objects_unfound" desc="Num Objects Unfound">{n_objects_unfound}</numericvalue>
		<numericvalue name="n_read_gb" desc="Total Read (GB)">{n_read_gb}</numericvalue>
		<numericvalue name="n_write_gb" desc="Total Write (GB)">{n_write_gb}</numericvalue>
	</data>

        <lemon>
                <cluster>ceph_beesly_mon</cluster>
                <cluster>ceph_beesly_osd</cluster>
        </lemon>

	<servicemanagers>
		<servicemanager email="*****@*****.**" login="******" main="false">Arne Wiebalck</servicemanager>
		<servicemanager email="*****@*****.**" login="******" main="false">Dan van der Ster</servicemanager>
	</servicemanagers>

</serviceupdate>"""
  print template.format(**context)
Exemple #6
0
def write_xml(slsid='Ceph'):
    osd_states = cephinfo.get_osd_states()
    osd_stats_sum = cephinfo.get_osd_stats_sum()
    pg_stats_sum = cephinfo.get_pg_stats_sum()['stat_sum']
#   pg_map = cephinfo.stat_data['pgmap']
    try:
        latency = cephinfo.get_write_latency()
        read_latency = cephinfo.get_read_latency()
        cephinfo.rados_cleanup(latency[0])
    except IndexError:
        latency = ['', [0, 0, 0]]
        read_latency = [0, 0, 0]
    pg_states = cephinfo.get_pg_states()
    osd_df = cephinfo.osd_df_data['nodes']
    activity = cephinfo.get_smooth_activity(10)
    status, availabilityinfo = get_status(pg_stats_sum, latency[1][0]*1000)
    context = {
        "slsid": slsid,
        "timestamp": datetime.strftime(datetime.now(), '%Y-%m-%dT%H:%M:%S'),
        "status": status,
        "availabilityinfo": availabilityinfo,
        "n_mons": cephinfo.get_n_mons(),
        "n_quorum": cephinfo.get_n_mons_quorum(),
        "n_pools": cephinfo.get_n_pools(),
        "n_osds": cephinfo.get_n_osds(),
        "n_osds_up": osd_states['up'],
        "n_osds_in": osd_states['in'],
        "n_pgs": cephinfo.get_n_pgs(),
        "n_osd_gb_total": osd_stats_sum['kb'] / 1024 / 1024,
        "n_osd_gb_used": osd_stats_sum['kb_used'] / 1024 / 1024,
        "n_osd_gb_avail": osd_stats_sum['kb_avail'] / 1024 / 1024,
        "n_pg_gbytes": pg_stats_sum['num_bytes'] / 1024 / 1024 / 1024,
        "n_objects": pg_stats_sum['num_objects'],
        "n_object_copies": pg_stats_sum['num_object_copies'],
        "n_objects_degraded": pg_stats_sum['num_objects_degraded'],
        "n_objects_unfound": pg_stats_sum['num_objects_unfound'],
        "n_objects_misplaced": pg_stats_sum['num_objects_misplaced'],
        "n_read_gb": pg_stats_sum['num_read_kb'] / 1024 / 1024,
        "n_write_gb": pg_stats_sum['num_write_kb'] / 1024 / 1024,
        "latency_ms": latency[1][0]*1000,
        "latency_max_ms": latency[1][1]*1000,
        "latency_min_ms": latency[1][2]*1000,
        "read_latency_ms": read_latency[0]*1000,
        "read_latency_max_ms": read_latency[1]*1000,
        "read_latency_min_ms": read_latency[2]*1000,
        "n_openstack_volumes": cephinfo.get_n_openstack_volumes(),
        "n_openstack_images": cephinfo.get_n_openstack_images(),
        "op_per_sec": activity[0],
        "read_mb_sec": activity[1],
        "write_mb_sec": activity[2],
        "graphite_prefix": slsid.replace('_', '.').lower() + '.sls',
        "graphite_osd_prefix": slsid.replace('_', '.').lower() + '.osds',
        "graphite_timestamp": int(datetime.time()),
    }

    for state in pg_states.keys():
        context['n_pgs_%s' % state] = pg_states[state]

    template = """
<?xml version="1.0" encoding="utf-8"?>

<serviceupdate xmlns="http://sls.cern.ch/SLS/XML/update">
        <id>{slsid}</id>

        <contact>[email protected]</contact>
        <webpage>https://twiki.cern.ch/twiki/bin/viewauth/DSSGroup/CephProject</webpage>

        <availabilitydesc>Status is available, degraded, or unavailable when the Ceph status is HEALTH_OK, HEALTH_WARN, or HEALTH_ERR, respectively.</availabilitydesc>

        <timestamp>{timestamp}</timestamp>

        <status>{status}</status>

        <availabilityinfo>{availabilityinfo}</availabilityinfo>

        <data>
            <numericvalue name="n_mons" desc="Num Mons">{n_mons}</numericvalue>
            <numericvalue name="n_quorum" desc="Num Mons in Quorum">{n_quorum}</numericvalue>
            <numericvalue name="n_pools" desc="Num Pools">{n_pools}</numericvalue>
            <numericvalue name="n_osds" desc="Num OSDs">{n_osds}</numericvalue>
            <numericvalue name="n_osds_up" desc="Num OSDs Up">{n_osds_up}</numericvalue>
            <numericvalue name="n_osds_in" desc="Num OSDs In">{n_osds_in}</numericvalue>
            <numericvalue name="n_pgs" desc="Num PGs">{n_pgs}</numericvalue>
"""

    for state in pg_states.keys():
        template = template + '        <numericvalue name="n_pgs_%s" desc="Num PGs %s">{n_pgs_%s}</numericvalue>\n' % (state, state, state)

    template = template + """        <numericvalue name="n_osd_gb_total" desc="OSD Gigabytes Total">{n_osd_gb_total}</numericvalue>
            <numericvalue name="n_osd_gb_used" desc="OSD Gigabytes Used">{n_osd_gb_used}</numericvalue>
            <numericvalue name="n_osd_gb_avail" desc="OSD Gigabytes Avail">{n_osd_gb_avail}</numericvalue>
            <numericvalue name="n_pg_gbytes" desc="PG Gigabytes">{n_pg_gbytes}</numericvalue>
            <numericvalue name="n_objects" desc="Num Objects">{n_objects}</numericvalue>
            <numericvalue name="n_object_copies" desc="Num Object Copies">{n_object_copies}</numericvalue>
            <numericvalue name="n_objects_degraded" desc="Num Objects Degraded">{n_objects_degraded}</numericvalue>
            <numericvalue name="n_objects_unfound" desc="Num Objects Unfound">{n_objects_unfound}</numericvalue>
            <numericvalue name="n_objects_misplaced" desc="Num Objects Misplaced">{n_objects_misplaced}</numericvalue>
            <numericvalue name="n_read_gb" desc="Total Read (GB)">{n_read_gb}</numericvalue>
            <numericvalue name="n_write_gb" desc="Total Write (GB)">{n_write_gb}</numericvalue>
            <numericvalue name="latency_ms" desc="Average">{latency_ms}</numericvalue>
            <numericvalue name="latency_max_ms" desc="Max">{latency_max_ms}</numericvalue>
            <numericvalue name="latency_min_ms" desc="Min">{latency_min_ms}</numericvalue>
            <numericvalue name="read_latency_ms" desc="Average">{read_latency_ms}</numericvalue>
            <numericvalue name="read_latency_max_ms" desc="Max">{read_latency_max_ms}</numericvalue>
            <numericvalue name="read_latency_min_ms" desc="Min">{read_latency_min_ms}</numericvalue>
            <numericvalue name="n_openstack_volumes" desc="Num OpenStack Volumes">{n_openstack_volumes}</numericvalue>
            <numericvalue name="n_openstack_images" desc="Num OpenStack Images">{n_openstack_images}</numericvalue>
            <numericvalue name="read_mb_sec" desc="Read MB/s">{read_mb_sec}</numericvalue>
            <numericvalue name="write_mb_sec" desc="Write MB/s">{write_mb_sec}</numericvalue>
            <numericvalue name="op_per_sec" desc="Operations Per Second">{op_per_sec}</numericvalue>
        </data>
</serviceupdate>
"""
    print(template.format(**context))

    # generate Graphite update
    graphite = """
{graphite_prefix}.n_mons {n_mons} {graphite_timestamp}
{graphite_prefix}.n_quorum {n_quorum} {graphite_timestamp}
{graphite_prefix}.n_pools {n_pools} {graphite_timestamp}
{graphite_prefix}.n_osds {n_osds} {graphite_timestamp}
{graphite_prefix}.n_osds_up {n_osds_up} {graphite_timestamp}
{graphite_prefix}.n_osds_in {n_osds_in} {graphite_timestamp}
{graphite_prefix}.n_pgs {n_pgs} {graphite_timestamp}
"""

    for state in pg_states.keys():
        graphite = graphite + "{graphite_prefix}.n_pgs_%s {n_pgs_%s} {graphite_timestamp}\n" % (state, state)

    graphite = graphite + """{graphite_prefix}.n_osd_gb_total {n_osd_gb_total} {graphite_timestamp}
{graphite_prefix}.n_osd_gb_used {n_osd_gb_used} {graphite_timestamp}
{graphite_prefix}.n_osd_gb_avail {n_osd_gb_avail} {graphite_timestamp}
{graphite_prefix}.n_pg_gbytes {n_pg_gbytes} {graphite_timestamp}
{graphite_prefix}.n_objects {n_objects} {graphite_timestamp}
{graphite_prefix}.n_object_copies {n_object_copies} {graphite_timestamp}
{graphite_prefix}.n_objects_degraded {n_objects_degraded} {graphite_timestamp}
{graphite_prefix}.n_objects_unfound {n_objects_unfound} {graphite_timestamp}
{graphite_prefix}.n_objects_misplaced {n_objects_misplaced} {graphite_timestamp}
{graphite_prefix}.n_read_gb {n_read_gb} {graphite_timestamp}
{graphite_prefix}.n_write_gb {n_write_gb} {graphite_timestamp}
{graphite_prefix}.latency_ms {latency_ms} {graphite_timestamp}
{graphite_prefix}.latency_max_ms {latency_max_ms} {graphite_timestamp}
{graphite_prefix}.latency_min_ms {latency_min_ms} {graphite_timestamp}
{graphite_prefix}.read_latency_ms {read_latency_ms} {graphite_timestamp}
{graphite_prefix}.read_latency_max_ms {read_latency_max_ms} {graphite_timestamp}
{graphite_prefix}.read_latency_min_ms {read_latency_min_ms} {graphite_timestamp}
{graphite_prefix}.n_openstack_volumes {n_openstack_volumes} {graphite_timestamp}
{graphite_prefix}.n_openstack_images {n_openstack_images} {graphite_timestamp}
{graphite_prefix}.read_mb_sec {read_mb_sec} {graphite_timestamp}
{graphite_prefix}.write_mb_sec {write_mb_sec} {graphite_timestamp}
{graphite_prefix}.op_per_sec {op_per_sec} {graphite_timestamp}
"""

    for osd in osd_df:
        graphite = graphite + "{graphite_osd_prefix}.%s.crush_weight %s {graphite_timestamp}\n" % (osd['id'], osd['crush_weight'])
        graphite = graphite + "{graphite_osd_prefix}.%s.reweight %s {graphite_timestamp}\n" % (osd['id'], osd['reweight'])
        graphite = graphite + "{graphite_osd_prefix}.%s.kb %s {graphite_timestamp}\n" % (osd['id'], osd['kb'])
        graphite = graphite + "{graphite_osd_prefix}.%s.kb_used %s {graphite_timestamp}\n" % (osd['id'], osd['kb_used'])
        graphite = graphite + "{graphite_osd_prefix}.%s.kb_avail %s {graphite_timestamp}\n" % (osd['id'], osd['kb_avail'])
        graphite = graphite + "{graphite_osd_prefix}.%s.utilization %s {graphite_timestamp}\n" % (osd['id'], osd['utilization'])
        graphite = graphite + "{graphite_osd_prefix}.%s.var %s {graphite_timestamp}\n" % (osd['id'], osd['var'])

    update = graphite.format(**context)
    sock = socket.socket()
    sock.connect((CARBON_SERVER, CARBON_PORT))
    sock.sendall(update)
    sock.close()
Exemple #7
0
def write_xml():
    osd_states = cephinfo.get_osd_states()
    osd_stats_sum = cephinfo.get_osd_stats_sum()
    pg_stats_sum = cephinfo.get_pg_stats_sum()['stat_sum']
    pg_map = cephinfo.stat_data['pgmap']
    latency = cephinfo.get_write_latency()
    pg_states = cephinfo.get_pg_states()
    read_latency = cephinfo.get_read_latency()
    cephinfo.rados_cleanup(latency[0])
    activity = cephinfo.get_smooth_activity(10)
    context = {
        "timestamp": commands.getoutput('date +%Y-%m-%dT%H:%M:%S'),
        "availability": get_availability(),
        "n_mons": cephinfo.get_n_mons(),
        "n_quorum": cephinfo.get_n_mons_quorum(),
        "n_pools": cephinfo.get_n_pools(),
        "n_osds": cephinfo.get_n_osds(),
        "n_osds_up": osd_states['up'],
        "n_osds_in": osd_states['in'],
        "n_pgs": cephinfo.get_n_pgs(),
        "n_pgs_active": pg_states['active'],
        "n_pgs_scrubbing": pg_states['scrubbing'],
        "n_pgs_deep": pg_states['deep'],
        "n_osd_gb_total": osd_stats_sum['kb'] / 1024 / 1024,
        "n_osd_gb_used": osd_stats_sum['kb_used'] / 1024 / 1024,
        "n_osd_gb_avail": osd_stats_sum['kb_avail'] / 1024 / 1024,
        "n_pg_gbytes": pg_stats_sum['num_bytes'] / 1024 / 1024 / 1024,
        "n_objects": pg_stats_sum['num_objects'],
        "n_object_copies": pg_stats_sum['num_object_copies'],
        "n_objects_degraded": pg_stats_sum['num_objects_degraded'],
        "n_objects_unfound": pg_stats_sum['num_objects_unfound'],
        "n_read_gb": pg_stats_sum['num_read_kb'] / 1024 / 1024,
        "n_write_gb": pg_stats_sum['num_write_kb'] / 1024 / 1024,
        "latency_ms": latency[1][0] * 1000,
        "latency_max_ms": latency[1][1] * 1000,
        "latency_min_ms": latency[1][2] * 1000,
        "read_latency_ms": read_latency[0] * 1000,
        "read_latency_max_ms": read_latency[1] * 1000,
        "read_latency_min_ms": read_latency[2] * 1000,
        "n_openstack_volumes": cephinfo.get_n_openstack_volumes(),
        "n_openstack_images": cephinfo.get_n_openstack_images(),
        "op_per_sec": activity[0],
        "read_mb_sec": activity[1],
        "write_mb_sec": activity[2],
    }
    template = """<?xml version="1.0" encoding="utf-8"?>

<serviceupdate xmlns="http://sls.cern.ch/SLS/XML/update">
   
        <id>Ceph</id>
        <fullname>Ceph Storage Service</fullname>
        <group>IT/DSS</group>

        <contact>ceph.support\@cern.ch</contact>
        <webpage>https://twiki.cern.ch/twiki/bin/viewauth/DSSGroup/CephProject</webpage>
        <alarmpage>http://cern.ch/ceph/alarms.html</alarmpage>
 
        <timestamp>{timestamp}</timestamp>
 
        <availability>{availability}</availability>
 
        <refreshperiod>PT15M</refreshperiod>
 
        <availabilitythresholds>
                <threshold level="available">98</threshold>
                <threshold level="affected">90</threshold>
                <threshold level="degraded">80</threshold>
        </availabilitythresholds>

        <data>
                <grp name="Monitors">
		    <numericvalue name="n_mons" desc="Num Mons">{n_mons}</numericvalue>
		    <numericvalue name="n_quorum" desc="Num Mons in Quorum">{n_quorum}</numericvalue>
                </grp>
		<numericvalue name="n_pools" desc="Num Pools">{n_pools}</numericvalue>
                <grp name="OSDs">
		    <numericvalue name="n_osds" desc="Num OSDs">{n_osds}</numericvalue>
		    <numericvalue name="n_osds_up" desc="Num OSDs Up">{n_osds_up}</numericvalue>
		    <numericvalue name="n_osds_in" desc="Num OSDs In">{n_osds_in}</numericvalue>
                </grp>
                <grp name="PGs">
		    <numericvalue name="n_pgs" desc="Num PGs">{n_pgs}</numericvalue>
		    <numericvalue name="n_pgs_active" desc="Num PGs Active">{n_pgs_active}</numericvalue>
		    <numericvalue name="n_pgs_scrubbing" desc="Num PGs Scrubbing">{n_pgs_scrubbing}</numericvalue>
		    <numericvalue name="n_pgs_deep" desc="Num PGs Deep Scrubbing">{n_pgs_deep}</numericvalue>
                </grp>
                <grp name="Disk Space">
		    <numericvalue name="n_osd_gb_total" desc="OSD Gigabytes Total">{n_osd_gb_total}</numericvalue>
		    <numericvalue name="n_osd_gb_used" desc="OSD Gigabytes Used">{n_osd_gb_used}</numericvalue>
		    <numericvalue name="n_osd_gb_avail" desc="OSD Gigabytes Avail">{n_osd_gb_avail}</numericvalue>
		    <numericvalue name="n_pg_gbytes" desc="PG Gigabytes">{n_pg_gbytes}</numericvalue>
                </grp>
                <grp name="Objects">
		    <numericvalue name="n_objects" desc="Num Objects">{n_objects}</numericvalue>
		    <numericvalue name="n_object_copies" desc="Num Object Copies">{n_object_copies}</numericvalue>
		    <numericvalue name="n_objects_degraded" desc="Num Objects Degraded">{n_objects_degraded}</numericvalue>
		    <numericvalue name="n_objects_unfound" desc="Num Objects Unfound">{n_objects_unfound}</numericvalue>
                </grp>
                <grp name="Total RW">
		    <numericvalue name="n_read_gb" desc="Total Read (GB)">{n_read_gb}</numericvalue>
		    <numericvalue name="n_write_gb" desc="Total Write (GB)">{n_write_gb}</numericvalue>
                </grp>
                <grp name="4KB Write Latency (ms)">
		    <numericvalue name="latency_ms" desc="Average">{latency_ms}</numericvalue>
		    <numericvalue name="latency_max_ms" desc="Max">{latency_max_ms}</numericvalue>
		    <numericvalue name="latency_min_ms" desc="Min">{latency_min_ms}</numericvalue>
                </grp>
                <grp name="4KB Read Latency (ms)">
		    <numericvalue name="read_latency_ms" desc="Average">{read_latency_ms}</numericvalue>
		    <numericvalue name="read_latency_max_ms" desc="Max">{read_latency_max_ms}</numericvalue>
		    <numericvalue name="read_latency_min_ms" desc="Min">{read_latency_min_ms}</numericvalue>
                </grp>
                <grp name="OpenStack">
		    <numericvalue name="n_openstack_volumes" desc="Num OpenStack Volumes">{n_openstack_volumes}</numericvalue>
		    <numericvalue name="n_openstack_images" desc="Num OpenStack Images">{n_openstack_images}</numericvalue>
                </grp>
                <grp name="Throughput">
		    <numericvalue name="read_mb_sec" desc="Read MB/s">{read_mb_sec}</numericvalue>
		    <numericvalue name="write_mb_sec" desc="Write MB/s">{write_mb_sec}</numericvalue>
                </grp>
		<numericvalue name="op_per_sec" desc="Operations Per Second">{op_per_sec}</numericvalue>
	</data>

        <lemon>
                <cluster>ceph_beesly_mon</cluster>
                <cluster>ceph_beesly_osd</cluster>
        </lemon>

	<servicemanagers>
		<servicemanager email="*****@*****.**" login="******" main="true">Dan van der Ster</servicemanager>
	</servicemanagers>

</serviceupdate>"""
    print template.format(**context)