def show_summary(report, cluster): t = html2.HTMLTable(["Setting", "Value"]) t.add_cells("Collected at", cluster.report_collected_at_local) t.add_cells("Collected at GMT", cluster.report_collected_at_gmt) t.add_cells("Status", cluster.overall_status) t.add_cells("PG count", cluster.num_pgs) t.add_cells("Pool count", len(cluster.pools)) t.add_cells("Used", b2ssize(cluster.bytes_used, False)) t.add_cells("Avail", b2ssize(cluster.bytes_avail, False)) t.add_cells("Data", b2ssize(cluster.data_bytes, False)) avail_perc = cluster.bytes_avail * 100 / cluster.bytes_total t.add_cells("Free %", avail_perc) osd_count = len(cluster.osds) t.add_cells("Mon count", len(cluster.mons)) report.add_block(3, "Status:", t) if cluster.settings is None: t = H.font("No live OSD found!", color="red") else: t = html2.HTMLTable(["Setting", "Value"]) t.add_cells("Count", osd_count) t.add_cells("PG per OSD", cluster.num_pgs / osd_count) t.add_cells("Cluster net", cluster.cluster_net) t.add_cells("Public net", cluster.public_net) t.add_cells("Near full ratio", cluster.settings.mon_osd_nearfull_ratio) t.add_cells("Full ratio", cluster.settings.mon_osd_full_ratio) t.add_cells("Backfill full ratio", cluster.settings.osd_backfill_full_ratio) t.add_cells("Filesafe full ratio", cluster.settings.osd_failsafe_full_ratio) t.add_cells("Journal aio", cluster.settings.journal_aio) t.add_cells("Journal dio", cluster.settings.journal_dio) t.add_cells("Filestorage sync", str(cluster.settings.filestore_max_sync_interval) + 's') report.add_block(3, "OSD:", t) t = html2.HTMLTable(["Setting", "Value"]) t.add_cells("Client IO Bps", b2ssize(cluster.write_bytes_sec, False)) t.add_cells("Client IO IOPS", b2ssize(cluster.op_per_sec, False)) report.add_block(2, "Activity:", t) if len(cluster.health_summary) != 0: t = html2.Doc() for msg in cluster.health_summary: if msg['severity'] == "HEALTH_WARN": color = "orange" elif msg['severity'] == "HEALTH_ERR": color = "red" else: color = "black" t.font(msg['summary'].capitalize(), color=color) t.br report.add_block(2, "Status messages:", t)
def show_pools_info(report, cluster): table = html2.HTMLTable(headers=["Pool", "Id", "size", "min_size", "obj", "data", "free", "read", "write", "ruleset", "PG", "PGP", "PG per OSD<br>Dev %"]) for _, pool in sorted(cluster.pools.items()): table.add_cell(pool.name) table.add_cell(str(pool.id)) table.add_cell(str(pool.size)) table.add_cell(str(pool.min_size)) table.add_cell(b2ssize(int(pool.num_objects), base=1000), sorttable_customkey=str(int(pool.num_objects))) table.add_cell(b2ssize(int(pool.size_bytes), False), sorttable_customkey=str(int(pool.size_bytes))) table.add_cell('---') table.add_cell(b2ssize(int(pool.read_bytes), False), sorttable_customkey=str(int(pool.read_bytes))) table.add_cell(b2ssize(int(pool.write_bytes), False), sorttable_customkey=str(int(pool.write_bytes))) table.add_cell(str(pool.crush_ruleset)) table.add_cell(str(pool.pg_num)) table.add_cell(str(pool.pg_placement_num)) if cluster.sum_per_osd is None: table.add_cell('-') else: row = [osd_pg.get(pool.name, 0) for osd_pg in cluster.osd_pool_pg_2d.values()] avg = float(sum(row)) / len(row) dev = (sum((i - avg) ** 2.0 for i in row) / len(row)) ** 0.5 table.add_cell(str(int(dev * 100. / avg))) table.next_row() report.add_block(8, "Pool's stats:", table)
def show_hosts_info(report, cluster): header_row = [ "Name", "Services", "CPU's", "RAM<br>total", "RAM<br>free", "Swap<br>used", "Load avg<br>5m" ] table = html2.HTMLTable(headers=header_row) for host in sorted(cluster.hosts.values(), key=lambda x: x.name): services = [ "osd-{0}".format(osd.id) for osd in cluster.osds if osd.host == host.name ] all_mons = [mon.name for mon in cluster.mons] if host.name in all_mons: services.append("mon(" + host.name + ")") table.add_cell(host.name) srv_strs = [] for idx in range(len(services) / 3): srv_strs.append(",".join(services[idx * 3:idx * 3 + 3])) table.add_cell("<br>".join(srv_strs)) if host.hw_info is None: map(table.add_cell, ['-'] * (len(header_row) - 2)) table.next_row() continue if host.hw_info.cores == []: table.add_cell("Error", sorttable_customkey='0') else: table.add_cell(sum(count for _, count in host.hw_info.cores)) table.add_cell(b2ssize(host.mem_total), sorttable_customkey=str(host.mem_total)) table.add_cell(b2ssize(host.mem_free), sorttable_customkey=str(host.mem_free)) table.add_cell(b2ssize(host.swap_total - host.swap_free), sorttable_customkey=str(host.swap_total - host.swap_free)) table.add_cell(host.load_5m) table.next_row() report.add_block(6, "Host's info:", table)
def show_hosts_info(report, cluster): header_row = ["Name", "Services", "CPU's", "RAM<br>total", "RAM<br>free", "Swap<br>used", "Load avg<br>5m"] table = html2.HTMLTable(headers=header_row) for host in sorted(cluster.hosts.values(), key=lambda x: x.name): services = ["osd-{0}".format(osd.id) for osd in cluster.osds if osd.host == host.name] all_mons = [mon.name for mon in cluster.mons] if host.name in all_mons: services.append("mon(" + host.name + ")") table.add_cell(host.name) srv_strs = [] for idx in range(len(services) / 3): srv_strs.append(",".join(services[idx * 3: idx * 3 + 3])) table.add_cell("<br>".join(srv_strs)) if host.hw_info is None: map(table.add_cell, ['-'] * (len(header_row) - 2)) table.next_row() continue if host.hw_info.cores == []: table.add_cell("Error", sorttable_customkey='0') else: table.add_cell(sum(count for _, count in host.hw_info.cores)) table.add_cell(b2ssize(host.mem_total), sorttable_customkey=str(host.mem_total)) table.add_cell(b2ssize(host.mem_free), sorttable_customkey=str(host.mem_free)) table.add_cell(b2ssize(host.swap_total - host.swap_free), sorttable_customkey=str(host.swap_total - host.swap_free)) table.add_cell(host.load_5m) table.next_row() report.add_block(6, "Host's info:", table)
def show_mons_info(report, cluster): table = html2.HTMLTable( headers=["Name", "Node", "Role", "Disk free<br>B (%)"]) for mon in cluster.mons: if mon.health == "HEALTH_OK": health = html_ok("HEALTH_OK") else: health = html_fail(mon.health) line = [ mon.name, health, mon.role, "{0} ({1})".format(b2ssize(mon.kb_avail * 1024, False), mon.avail_percent) ] table.add_row(map(str, line)) report.add_block(3, "Monitors info:", table)
def show_mons_info(report, cluster): table = html2.HTMLTable(headers=["Name", "Node", "Role", "Disk free<br>B (%)"]) for mon in cluster.mons: if mon.health == "HEALTH_OK": health = html_ok("HEALTH_OK") else: health = html_fail(mon.health) line = [ mon.name, health, mon.role, "{0} ({1})".format(b2ssize(mon.kb_avail * 1024, False), mon.avail_percent) ] table.add_row(map(str, line)) report.add_block(3, "Monitors info:", table)
def show_hosts_resource_usage(report, cluster): nets_info = {} for host in cluster.hosts.values(): ceph_adapters = [ net.name for net in (host.cluster_net, host.public_net) if net is not None ] nets = [ net for net in host.net_adapters.values() if net.is_phy and host.cluster_net not in ceph_adapters ] nets_info[host.name] = sorted(nets, key=lambda x: x.name) if len(nets_info) == 0: max_nets = 0 else: max_nets = max(map(len, nets_info.values())) header_row = [ "Hostname", "Cluster net<br>dev, ip<br>settings", "Cluster net<br>uptime average<br>send/recv", "Cluster net<br>current<br>send/recv", "Public net<br>dev, ip<br>settings", "Public net<br>uptime average<br>send/recv", "Public net<br>current<br>send/recv" ] header_row += ["Net"] * max_nets row_len = len(header_row) table = html2.HTMLTable(headers=header_row) for host in sorted(cluster.hosts.values(), key=lambda x: x.name): perf_info = [host.name] for net in (host.cluster_net, host.public_net): if net is None: perf_info.extend(["-"] * 3) else: dev_ip = "{0}<br>{1}".format(net.name, net.ip) if host.hw_info is None or net.name not in host.hw_info.net_info: perf_info.append(dev_ip) else: speed, dtype, _ = host.hw_info.net_info[net.name] settings = "{0}, {1}".format(speed, dtype) perf_info.append("{0}<br>{1}".format(dev_ip, settings)) perf_info.append("{0} / {1} Bps<br>{2} / {3} Pps".format( b2ssize(float(net.perf_stats.sbytes) / host.uptime, False), b2ssize(float(net.perf_stats.rbytes) / host.uptime, False), b2ssize( float(net.perf_stats.spackets) / host.uptime, False), b2ssize( float(net.perf_stats.rpackets) / host.uptime, False))) if net.perf_stats_curr is not None: perf_info.append("{0} / {1} Bps<br>{2} / {3} Pps".format( b2ssize(net.perf_stats_curr.sbytes, False), b2ssize(net.perf_stats_curr.rbytes, False), b2ssize(net.perf_stats_curr.spackets, False), b2ssize(net.perf_stats_curr.rpackets, False), )) else: perf_info.append('-') for net in nets_info[host.name]: if net.speed is not None: cell_data = H.div(net.name, _class="left") + \ H.div(b2ssize(net.speed), _class="right") perf_info.append(cell_data) else: perf_info.append(net.name) perf_info += ['-'] * (row_len - len(perf_info)) table.add_row(map(str, perf_info)) report.add_block(12, "Host's resource usage:", table)
def show_host_io_load_in_color(report, cluster): rbts = collections.defaultdict(lambda: {}) wbts = collections.defaultdict(lambda: {}) bts = collections.defaultdict(lambda: {}) wiops = collections.defaultdict(lambda: {}) riops = collections.defaultdict(lambda: {}) iops = collections.defaultdict(lambda: {}) queue_depth = collections.defaultdict(lambda: {}) lat = collections.defaultdict(lambda: {}) io_time = collections.defaultdict(lambda: {}) w_io_time = collections.defaultdict(lambda: {}) for osd in cluster.osds: for dev_stat in (osd.data_stor_stats, osd.j_stor_stats): if dev_stat is None or 'write_bytes_curr' not in dev_stat: continue dev = os.path.basename(dev_stat.root_dev) wbts[osd.host][dev] = dev_stat.write_bytes_curr rbts[osd.host][dev] = dev_stat.read_bytes_curr bts[osd.host][ dev] = dev_stat.write_bytes_curr + dev_stat.read_bytes_curr wiops[osd.host][dev] = dev_stat.write_iops_curr riops[osd.host][dev] = dev_stat.read_iops_curr iops[osd.host][ dev] = dev_stat.write_iops_curr + dev_stat.read_iops_curr queue_depth[osd.host][dev] = dev_stat.w_io_time_curr lat[osd.host][dev] = dev_stat.lat_curr * 1000 io_time[osd.host][dev] = int(dev_stat.io_time_curr * 100) w_io_time[osd.host][dev] = dev_stat.w_io_time_curr if len(wbts) == 0: report.add_block(1, "No current IO load awailable", "") return loads = [ (iops, 1000, 'IOPS', 50), (riops, 1000, 'Read IOPS', 30), (wiops, 1000, 'Write IOPS', 30), (bts, 1024, 'Bps', 100 * 1024**2), (rbts, 1024, 'Read Bps', 100 * 1024**2), (wbts, 1024, 'Write Bps', 100 * 1024**2), (lat, None, 'Latency, ms', 20), (queue_depth, None, 'Average QD', 3), (io_time, None, 'Active time %', 100), # (w_io_time, None, 'W. time share', 5.0), ] report.add_block(12, "Current disk IO load", "") for pos, (target, base, tp, min_max_val) in enumerate(loads, 1): if target is lat: max_val = 300.0 else: max_val = max(map(max, [data.values() for data in target.values()])) max_val = max(min_max_val, max_val) max_len = max(map(len, target.values())) table = html2.HTMLTable(['host'] + ['load'] * max_len, zebra=False) for host_name, data in sorted(target.items()): table.add_cell(host_name) for dev, val in sorted(data.items()): if max_val == 0: color = "#FFFFFF" else: color = val_to_color(float(val) / max_val) if target is lat: s_val = str(int(val)) elif target is queue_depth: s_val = "%.1f" % (val, ) elif base is None: s_val = "%.1f" % (val, ) else: s_val = b2ssize(val, False, base=base) cell_data = H.div(dev, _class="left") + H.div(s_val, _class="right") table.add_cell(cell_data, bgcolor=color, sorttable_customkey=str(val)) for i in range(max_len - len(data)): table.add_cell("-", sorttable_customkey='0') table.next_row() ncols = max_len + 1 # header weight = max(2, min(12, ncols)) report.add_block(weight, tp, table, "Disk IO - " + tp)
def show_host_network_load_in_color(report, cluster): net_io = collections.defaultdict(lambda: {}) send_net_io = collections.defaultdict(lambda: {}) recv_net_io = collections.defaultdict(lambda: {}) for host in cluster.hosts.values(): ceph_adapters = [] for net in (host.cluster_net, host.public_net): if net is not None: ceph_adapters.append(net.name) else: ceph_adapters.append('-') nets = [('cluster', host.cluster_net), ('public', host.public_net)] nets += [(net.name, net) for net in host.net_adapters.values() if net.is_phy and net.name not in ceph_adapters] for name, net in nets: if net is None or net.perf_stats_curr is None: continue usage = max( (net.perf_stats_curr.sbytes, net.perf_stats_curr.rbytes)) if usage > 0 or name in ('cluster', 'public'): net_io[host.name][name] = (usage, net.speed) send_net_io[host.name][name] = (net.perf_stats_curr.sbytes, net.speed) recv_net_io[host.name][name] = (net.perf_stats_curr.rbytes, net.speed) if len(net_io) == 0: report.add_block(6, "No network load awailable", "") return std_nets = set(['public', 'cluster']) report.add_block(12, "Network load (to max dev throughput)", "") loads = [ # (net_io, "Network load max (to max dev throughput):"), (send_net_io, "Send"), (recv_net_io, "Receive"), ] for io, name in loads: max_len = max(len(set(data) - std_nets) for data in io.values()) table = html2.HTMLTable(["host", "public<br>net", "cluster<br>net"] + ["hw adapter"] * max_len, zebra=False) for host_name, data in sorted(io.items()): net_names = list(std_nets) + sorted(set(data) - std_nets) table.add_cell(host_name) for net_name in net_names: if net_name not in data: table.add_cell('-') continue usage, speed = data[net_name] if speed is None: color = "#FFFFFF" else: color = val_to_color(float(usage) / speed) if net_name not in std_nets: text = H.div(net_name, _class="left") + \ H.div(b2ssize(usage, False), _class="right") else: text = b2ssize(usage, False) table.add_cell(text, bgcolor=color, sorttable_customkey=str(usage)) for i in range(max_len + len(std_nets) - len(data.items())): table.add_cell("-", sorttable_customkey='0') table.next_row() ncols = max_len + len(std_nets) + 1 # header weight = max(2, min(12, ncols)) report.add_block(weight, name, table, "Network - " + name)
def show_osd_perf_info(report, cluster): table = html2.HTMLTable(headers=[ "OSD", "node", "apply<br>lat, ms", "commit<br>lat, ms", "D dev", "D read<br>Bps", "D write<br>Bps", "D read<br>OPS", "D write<br>OPS", "D IO<br>time %", "J dev", "J read<br>Bps", "J write<br>Bps", "J read<br>OPS", "J write<br>OPS", "J IO<br>time %", ]) for osd in cluster.osds: if osd.osd_perf is not None: apply_latency_ms = osd.osd_perf["apply_latency_ms"] commit_latency_ms = osd.osd_perf["commit_latency_ms"] else: apply_latency_ms = HTML_UNKNOWN commit_latency_ms = HTML_UNKNOWN perf_info = [] for dev_stat in (osd.data_stor_stats, osd.j_stor_stats): if dev_stat is None or 'read_bytes_uptime' not in dev_stat: perf_info.extend([('-', 0)] * 6) continue perf_info.extend([(os.path.basename(dev_stat.root_dev), os.path.basename(dev_stat.root_dev)), (b2ssize(dev_stat.read_bytes_uptime, False), dev_stat.read_bytes_uptime), (b2ssize(dev_stat.write_bytes_uptime, False), dev_stat.write_bytes_uptime), (b2ssize(dev_stat.read_iops_uptime, False), dev_stat.read_iops_uptime), (b2ssize(dev_stat.write_iops_uptime, False), dev_stat.write_iops_uptime), (int(dev_stat.io_time_uptime * 100), dev_stat.io_time_uptime)]) table.add_cell(str(osd.id)) table.add_cell(osd.host) table.add_cell(str(apply_latency_ms)) table.add_cell(str(commit_latency_ms)) for val, sorted_val in perf_info: table.add_cell(str(val), sorttable_customkey=str(sorted_val)) table.next_row() report.add_block(8, "OSD's load uptime average:", table) table = html2.HTMLTable(headers=[ "OSD", "node", "D dev", "D read<br>Bps", "D write<br>Bps", "D read<br>OPS", "D write<br>OPS", "D lat<br>ms", "D IO<br>time %", "J dev", "J read<br>Bps", "J write<br>Bps", "J read<br>OPS", "J write<br>OPS", "J lat<br>ms", "J IO<br>time %", ]) have_any_data = False for osd in cluster.osds: perf_info = [] have_data = False for dev_stat in (osd.data_stor_stats, osd.j_stor_stats): if dev_stat is None or 'read_bytes_curr' not in dev_stat: perf_info.extend([('-', 0)] * 7) continue have_data = True have_any_data = True perf_info.extend([ (os.path.basename(dev_stat.root_dev), None), (b2ssize(dev_stat.read_bytes_curr, False), dev_stat.read_bytes_curr), (b2ssize(dev_stat.write_bytes_curr, False), dev_stat.write_bytes_curr), (b2ssize(dev_stat.read_iops_curr, False), dev_stat.read_iops_curr), (b2ssize(dev_stat.write_iops_curr, False), dev_stat.write_iops_curr), (int(dev_stat.lat_curr * 1000), dev_stat.lat_curr), (int(dev_stat.io_time_curr * 100), dev_stat.io_time_curr) ]) if have_data: table.add_cell(str(osd.id)) table.add_cell(osd.host) for val, sorted_val in perf_info: table.add_cell(str(val), sorttable_customkey=str(sorted_val)) table.next_row() if have_any_data: report.add_block(8, "OSD's current load:", table) else: report.add_block(6, "OSD's current load unawailable", "")
def show_osd_info(report, cluster): table = html2.HTMLTable(headers=[ "OSD", "node", "status", "daemon<br>run", "weight<br>reweight", "PG count", "Storage<br>used", "Storage<br>free", "Storage<br>free %", "Journal<br>on same<br>disk", "Journal<br>on SSD", "Journal<br>on file" ]) for osd in cluster.osds: if osd.daemon_runs is None: daemon_msg = HTML_UNKNOWN elif osd.daemon_runs: daemon_msg = html_ok('yes') else: daemon_msg = html_fail('no') if osd.data_stor_stats is not None: used_b = osd.data_stor_stats.get('used') avail_b = osd.data_stor_stats.get('avail') avail_perc = int((avail_b * 100.0) / (avail_b + used_b) + 0.5) if avail_perc < 20: color = "red" elif avail_perc < 40: color = "yellow" else: color = "green" avail_perc_str = H.font(avail_perc, color=color) if osd.data_stor_stats.root_dev == osd.j_stor_stats.root_dev: j_on_same_drive = html_fail("yes") else: j_on_same_drive = html_ok("no") if osd.data_stor_stats.dev != osd.j_stor_stats.dev: j_on_file = html_ok("no") else: j_on_file = html_fail("yes") if osd.j_stor_stats.is_ssd: j_on_ssd = html_ok("yes") else: j_on_ssd = html_fail("no") else: used_b = HTML_UNKNOWN avail_b = HTML_UNKNOWN avail_perc_str = HTML_UNKNOWN j_on_same_drive = HTML_UNKNOWN j_on_file = HTML_UNKNOWN if osd.status == 'up': status = html_ok("up") else: status = html_fail("down") table.add_cell(str(osd.id)) table.add_cell(osd.host) table.add_cell(status) table.add_cell(daemon_msg) str_w = "%.3f / %.3f" % (float(osd.crush_weight), float(osd.reweight)) table.add_cell(str_w, sorttable_customkey=str( float(osd.crush_weight) * float(osd.reweight))) if osd.pg_count is None: table.add_cell(HTML_UNKNOWN, sorttable_customkey=0) else: table.add_cell(str(osd.pg_count)) if isinstance(used_b, str): table.add_cell(used_b, sorttable_customkey='0') else: table.add_cell(b2ssize(used_b, False), sorttable_customkey=used_b) if isinstance(avail_b, str): table.add_cell(avail_b, sorttable_customkey='0') else: table.add_cell(b2ssize(avail_b, False), sorttable_customkey=avail_b) table.add_cell(avail_perc_str) table.add_cell(j_on_same_drive) table.add_cell(j_on_ssd) table.add_cell(j_on_file) table.next_row() report.add_block(8, "OSD's info:", table)
def show_osd_info(report, cluster): table = html2.HTMLTable(headers=["OSD", "node", "status", "daemon<br>run", "weight<br>reweight", "PG count", "Storage<br>used", "Storage<br>free", "Storage<br>free %", "Journal<br>on same<br>disk", "Journal<br>on SSD", "Journal<br>on file"]) for osd in cluster.osds: if osd.daemon_runs is None: daemon_msg = HTML_UNKNOWN elif osd.daemon_runs: daemon_msg = html_ok('yes') else: daemon_msg = html_fail('no') if osd.data_stor_stats is not None: used_b = osd.data_stor_stats.get('used') avail_b = osd.data_stor_stats.get('avail') avail_perc = int((avail_b * 100.0) / (avail_b + used_b) + 0.5) if avail_perc < 20: color = "red" elif avail_perc < 40: color = "yellow" else: color = "green" avail_perc_str = H.font(avail_perc, color=color) if osd.data_stor_stats.root_dev == osd.j_stor_stats.root_dev: j_on_same_drive = html_fail("yes") else: j_on_same_drive = html_ok("no") if osd.data_stor_stats.dev != osd.j_stor_stats.dev: j_on_file = html_ok("no") else: j_on_file = html_fail("yes") if osd.j_stor_stats.is_ssd: j_on_ssd = html_ok("yes") else: j_on_ssd = html_fail("no") else: used_b = HTML_UNKNOWN avail_b = HTML_UNKNOWN avail_perc_str = HTML_UNKNOWN j_on_same_drive = HTML_UNKNOWN j_on_file = HTML_UNKNOWN if osd.status == 'up': status = html_ok("up") else: status = html_fail("down") table.add_cell(str(osd.id)) table.add_cell(osd.host) table.add_cell(status) table.add_cell(daemon_msg) str_w = "%.3f / %.3f" % (float(osd.crush_weight), float(osd.reweight)) table.add_cell(str_w, sorttable_customkey=str(float(osd.crush_weight) * float(osd.reweight))) if osd.pg_count is None: table.add_cell(HTML_UNKNOWN, sorttable_customkey=0) else: table.add_cell(str(osd.pg_count)) if isinstance(used_b, str): table.add_cell(used_b, sorttable_customkey='0') else: table.add_cell(b2ssize(used_b, False), sorttable_customkey=used_b) if isinstance(avail_b, str): table.add_cell(avail_b, sorttable_customkey='0') else: table.add_cell(b2ssize(avail_b, False), sorttable_customkey=avail_b) table.add_cell(avail_perc_str) table.add_cell(j_on_same_drive) table.add_cell(j_on_ssd) table.add_cell(j_on_file) table.next_row() report.add_block(8, "OSD's info:", table)
def show_hosts_resource_usage(report, cluster): nets_info = {} for host in cluster.hosts.values(): ceph_adapters = [net.name for net in (host.cluster_net, host.public_net) if net is not None] nets = [net for net in host.net_adapters.values() if net.is_phy and host.cluster_net not in ceph_adapters] nets_info[host.name] = sorted(nets, key=lambda x: x.name) if len(nets_info) == 0: max_nets = 0 else: max_nets = max(map(len, nets_info.values())) header_row = ["Hostname", "Cluster net<br>dev, ip<br>settings", "Cluster net<br>uptime average<br>send/recv", "Cluster net<br>current<br>send/recv", "Public net<br>dev, ip<br>settings", "Public net<br>uptime average<br>send/recv", "Public net<br>current<br>send/recv"] header_row += ["Net"] * max_nets row_len = len(header_row) table = html2.HTMLTable(headers=header_row) for host in sorted(cluster.hosts.values(), key=lambda x: x.name): perf_info = [host.name] for net in (host.cluster_net, host.public_net): if net is None: perf_info.extend(["-"] * 3) else: dev_ip = "{0}<br>{1}".format(net.name, net.ip) if host.hw_info is None or net.name not in host.hw_info.net_info: perf_info.append(dev_ip) else: speed, dtype, _ = host.hw_info.net_info[net.name] settings = "{0}, {1}".format(speed, dtype) perf_info.append("{0}<br>{1}".format(dev_ip, settings)) perf_info.append("{0} / {1} Bps<br>{2} / {3} Pps".format( b2ssize(float(net.perf_stats.sbytes) / host.uptime, False), b2ssize(float(net.perf_stats.rbytes) / host.uptime, False), b2ssize(float(net.perf_stats.spackets) / host.uptime, False), b2ssize(float(net.perf_stats.rpackets) / host.uptime, False) )) if net.perf_stats_curr is not None: perf_info.append("{0} / {1} Bps<br>{2} / {3} Pps".format( b2ssize(net.perf_stats_curr.sbytes, False), b2ssize(net.perf_stats_curr.rbytes, False), b2ssize(net.perf_stats_curr.spackets, False), b2ssize(net.perf_stats_curr.rpackets, False), )) else: perf_info.append('-') for net in nets_info[host.name]: if net.speed is not None: cell_data = H.div(net.name, _class="left") + \ H.div(b2ssize(net.speed), _class="right") perf_info.append(cell_data) else: perf_info.append(net.name) perf_info += ['-'] * (row_len - len(perf_info)) table.add_row(map(str, perf_info)) report.add_block(12, "Host's resource usage:", table)
def show_host_io_load_in_color(report, cluster): rbts = collections.defaultdict(lambda: {}) wbts = collections.defaultdict(lambda: {}) bts = collections.defaultdict(lambda: {}) wiops = collections.defaultdict(lambda: {}) riops = collections.defaultdict(lambda: {}) iops = collections.defaultdict(lambda: {}) queue_depth = collections.defaultdict(lambda: {}) lat = collections.defaultdict(lambda: {}) io_time = collections.defaultdict(lambda: {}) w_io_time = collections.defaultdict(lambda: {}) for osd in cluster.osds: for dev_stat in (osd.data_stor_stats, osd.j_stor_stats): if dev_stat is None or 'write_bytes_curr' not in dev_stat: continue dev = os.path.basename(dev_stat.root_dev) wbts[osd.host][dev] = dev_stat.write_bytes_curr rbts[osd.host][dev] = dev_stat.read_bytes_curr bts[osd.host][dev] = dev_stat.write_bytes_curr + dev_stat.read_bytes_curr wiops[osd.host][dev] = dev_stat.write_iops_curr riops[osd.host][dev] = dev_stat.read_iops_curr iops[osd.host][dev] = dev_stat.write_iops_curr + dev_stat.read_iops_curr queue_depth[osd.host][dev] = dev_stat.w_io_time_curr lat[osd.host][dev] = dev_stat.lat_curr * 1000 io_time[osd.host][dev] = int(dev_stat.io_time_curr * 100) w_io_time[osd.host][dev] = dev_stat.w_io_time_curr if len(wbts) == 0: report.add_block(1, "No current IO load awailable", "") return loads = [ (iops, 1000, 'IOPS', 50), (riops, 1000, 'Read IOPS', 30), (wiops, 1000, 'Write IOPS', 30), (bts, 1024, 'Bps', 100 * 1024 ** 2), (rbts, 1024, 'Read Bps', 100 * 1024 ** 2), (wbts, 1024, 'Write Bps', 100 * 1024 ** 2), (lat, None, 'Latency, ms', 20), (queue_depth, None, 'Average QD', 3), (io_time, None, 'Active time %', 100), # (w_io_time, None, 'W. time share', 5.0), ] report.add_block(12, "Current disk IO load", "") for pos, (target, base, tp, min_max_val) in enumerate(loads, 1): if target is lat: max_val = 300.0 else: max_val = max(map(max, [data.values() for data in target.values()])) max_val = max(min_max_val, max_val) max_len = max(map(len, target.values())) table = html2.HTMLTable(['host'] + ['load'] * max_len, zebra=False) for host_name, data in sorted(target.items()): table.add_cell(host_name) for dev, val in sorted(data.items()): if max_val == 0: color = "#FFFFFF" else: color = val_to_color(float(val) / max_val) if target is lat: s_val = str(int(val)) elif target is queue_depth: s_val = "%.1f" % (val,) elif base is None: s_val = "%.1f" % (val,) else: s_val = b2ssize(val, False, base=base) cell_data = H.div(dev, _class="left") + H.div(s_val, _class="right") table.add_cell(cell_data, bgcolor=color, sorttable_customkey=str(val)) for i in range(max_len - len(data)): table.add_cell("-", sorttable_customkey='0') table.next_row() ncols = max_len + 1 # header weight = max(2, min(12, ncols)) report.add_block(weight, tp, table, "Disk IO - " + tp)
def show_host_network_load_in_color(report, cluster): net_io = collections.defaultdict(lambda: {}) send_net_io = collections.defaultdict(lambda: {}) recv_net_io = collections.defaultdict(lambda: {}) for host in cluster.hosts.values(): ceph_adapters = [] for net in (host.cluster_net, host.public_net): if net is not None: ceph_adapters.append(net.name) else: ceph_adapters.append('-') nets = [('cluster', host.cluster_net), ('public', host.public_net)] nets += [(net.name, net) for net in host.net_adapters.values() if net.is_phy and net.name not in ceph_adapters] for name, net in nets: if net is None or net.perf_stats_curr is None: continue usage = max((net.perf_stats_curr.sbytes, net.perf_stats_curr.rbytes)) if usage > 0 or name in ('cluster', 'public'): net_io[host.name][name] = (usage, net.speed) send_net_io[host.name][name] = (net.perf_stats_curr.sbytes, net.speed) recv_net_io[host.name][name] = (net.perf_stats_curr.rbytes, net.speed) if len(net_io) == 0: report.add_block(6, "No network load awailable", "") return std_nets = set(['public', 'cluster']) report.add_block(12, "Network load (to max dev throughput)", "") loads = [ # (net_io, "Network load max (to max dev throughput):"), (send_net_io, "Send"), (recv_net_io, "Receive"), ] for io, name in loads: max_len = max(len(set(data) - std_nets) for data in io.values()) table = html2.HTMLTable( ["host", "public<br>net", "cluster<br>net"] + ["hw adapter"] * max_len, zebra=False ) for host_name, data in sorted(io.items()): net_names = list(std_nets) + sorted(set(data) - std_nets) table.add_cell(host_name) for net_name in net_names: if net_name not in data: table.add_cell('-') continue usage, speed = data[net_name] if speed is None: color = "#FFFFFF" else: color = val_to_color(float(usage) / speed) if net_name not in std_nets: text = H.div(net_name, _class="left") + \ H.div(b2ssize(usage, False), _class="right") else: text = b2ssize(usage, False) table.add_cell(text, bgcolor=color, sorttable_customkey=str(usage)) for i in range(max_len + len(std_nets) - len(data.items())): table.add_cell("-", sorttable_customkey='0') table.next_row() ncols = max_len + len(std_nets) + 1 # header weight = max(2, min(12, ncols)) report.add_block(weight, name, table, "Network - " + name)
def show_osd_perf_info(report, cluster): table = html2.HTMLTable(headers=["OSD", "node", "apply<br>lat, ms", "commit<br>lat, ms", "D dev", "D read<br>Bps", "D write<br>Bps", "D read<br>OPS", "D write<br>OPS", "D IO<br>time %", "J dev", "J read<br>Bps", "J write<br>Bps", "J read<br>OPS", "J write<br>OPS", "J IO<br>time %", ]) for osd in cluster.osds: if osd.osd_perf is not None: apply_latency_ms = osd.osd_perf["apply_latency_ms"] commit_latency_ms = osd.osd_perf["commit_latency_ms"] else: apply_latency_ms = HTML_UNKNOWN commit_latency_ms = HTML_UNKNOWN perf_info = [] for dev_stat in (osd.data_stor_stats, osd.j_stor_stats): if dev_stat is None or 'read_bytes_uptime' not in dev_stat: perf_info.extend([('-', 0)] * 6) continue perf_info.extend([ (os.path.basename(dev_stat.root_dev), os.path.basename(dev_stat.root_dev)), (b2ssize(dev_stat.read_bytes_uptime, False), dev_stat.read_bytes_uptime), (b2ssize(dev_stat.write_bytes_uptime, False), dev_stat.write_bytes_uptime), (b2ssize(dev_stat.read_iops_uptime, False), dev_stat.read_iops_uptime), (b2ssize(dev_stat.write_iops_uptime, False), dev_stat.write_iops_uptime), (int(dev_stat.io_time_uptime * 100), dev_stat.io_time_uptime) ]) table.add_cell(str(osd.id)) table.add_cell(osd.host) table.add_cell(str(apply_latency_ms)) table.add_cell(str(commit_latency_ms)) for val, sorted_val in perf_info: table.add_cell(str(val), sorttable_customkey=str(sorted_val)) table.next_row() report.add_block(8, "OSD's load uptime average:", table) table = html2.HTMLTable(headers=["OSD", "node", "D dev", "D read<br>Bps", "D write<br>Bps", "D read<br>OPS", "D write<br>OPS", "D lat<br>ms", "D IO<br>time %", "J dev", "J read<br>Bps", "J write<br>Bps", "J read<br>OPS", "J write<br>OPS", "J lat<br>ms", "J IO<br>time %", ]) have_any_data = False for osd in cluster.osds: perf_info = [] have_data = False for dev_stat in (osd.data_stor_stats, osd.j_stor_stats): if dev_stat is None or 'read_bytes_curr' not in dev_stat: perf_info.extend([('-', 0)] * 7) continue have_data = True have_any_data = True perf_info.extend([ (os.path.basename(dev_stat.root_dev), None), (b2ssize(dev_stat.read_bytes_curr, False), dev_stat.read_bytes_curr), (b2ssize(dev_stat.write_bytes_curr, False), dev_stat.write_bytes_curr), (b2ssize(dev_stat.read_iops_curr, False), dev_stat.read_iops_curr), (b2ssize(dev_stat.write_iops_curr, False), dev_stat.write_iops_curr), (int(dev_stat.lat_curr * 1000), dev_stat.lat_curr), (int(dev_stat.io_time_curr * 100), dev_stat.io_time_curr) ]) if have_data: table.add_cell(str(osd.id)) table.add_cell(osd.host) for val, sorted_val in perf_info: table.add_cell(str(val), sorttable_customkey=str(sorted_val)) table.next_row() if have_any_data: report.add_block(8, "OSD's current load:", table) else: report.add_block(6, "OSD's current load unawailable", "")