Example #1
0
def show_summary(report, cluster):
    t = html2.HTMLTable(["Setting", "Value"])
    t.add_cells("Collected at", cluster.report_collected_at_local)
    t.add_cells("Collected at GMT", cluster.report_collected_at_gmt)
    t.add_cells("Status", cluster.overall_status)
    t.add_cells("PG count", cluster.num_pgs)
    t.add_cells("Pool count", len(cluster.pools))
    t.add_cells("Used", b2ssize(cluster.bytes_used, False))
    t.add_cells("Avail", b2ssize(cluster.bytes_avail, False))
    t.add_cells("Data", b2ssize(cluster.data_bytes, False))

    avail_perc = cluster.bytes_avail * 100 / cluster.bytes_total
    t.add_cells("Free %", avail_perc)

    osd_count = len(cluster.osds)
    t.add_cells("Mon count", len(cluster.mons))

    report.add_block(3, "Status:", t)

    if cluster.settings is None:
        t = H.font("No live OSD found!", color="red")
    else:
        t = html2.HTMLTable(["Setting", "Value"])
        t.add_cells("Count", osd_count)
        t.add_cells("PG per OSD", cluster.num_pgs / osd_count)
        t.add_cells("Cluster net", cluster.cluster_net)
        t.add_cells("Public net", cluster.public_net)
        t.add_cells("Near full ratio", cluster.settings.mon_osd_nearfull_ratio)
        t.add_cells("Full ratio", cluster.settings.mon_osd_full_ratio)
        t.add_cells("Backfill full ratio",
                    cluster.settings.osd_backfill_full_ratio)
        t.add_cells("Filesafe full ratio",
                    cluster.settings.osd_failsafe_full_ratio)
        t.add_cells("Journal aio", cluster.settings.journal_aio)
        t.add_cells("Journal dio", cluster.settings.journal_dio)
        t.add_cells("Filestorage sync",
                    str(cluster.settings.filestore_max_sync_interval) + 's')
    report.add_block(3, "OSD:", t)

    t = html2.HTMLTable(["Setting", "Value"])
    t.add_cells("Client IO Bps", b2ssize(cluster.write_bytes_sec, False))
    t.add_cells("Client IO IOPS", b2ssize(cluster.op_per_sec, False))
    report.add_block(2, "Activity:", t)

    if len(cluster.health_summary) != 0:
        t = html2.Doc()
        for msg in cluster.health_summary:
            if msg['severity'] == "HEALTH_WARN":
                color = "orange"
            elif msg['severity'] == "HEALTH_ERR":
                color = "red"
            else:
                color = "black"

            t.font(msg['summary'].capitalize(), color=color)
            t.br

        report.add_block(2, "Status messages:", t)
def show_summary(report, cluster):
    t = html2.HTMLTable(["Setting", "Value"])
    t.add_cells("Collected at", cluster.report_collected_at_local)
    t.add_cells("Collected at GMT", cluster.report_collected_at_gmt)
    t.add_cells("Status", cluster.overall_status)
    t.add_cells("PG count", cluster.num_pgs)
    t.add_cells("Pool count", len(cluster.pools))
    t.add_cells("Used", b2ssize(cluster.bytes_used, False))
    t.add_cells("Avail", b2ssize(cluster.bytes_avail, False))
    t.add_cells("Data", b2ssize(cluster.data_bytes, False))

    avail_perc = cluster.bytes_avail * 100 / cluster.bytes_total
    t.add_cells("Free %", avail_perc)

    osd_count = len(cluster.osds)
    t.add_cells("Mon count", len(cluster.mons))

    report.add_block(3, "Status:", t)

    if cluster.settings is None:
        t = H.font("No live OSD found!", color="red")
    else:
        t = html2.HTMLTable(["Setting", "Value"])
        t.add_cells("Count", osd_count)
        t.add_cells("PG per OSD", cluster.num_pgs / osd_count)
        t.add_cells("Cluster net", cluster.cluster_net)
        t.add_cells("Public net", cluster.public_net)
        t.add_cells("Near full ratio", cluster.settings.mon_osd_nearfull_ratio)
        t.add_cells("Full ratio", cluster.settings.mon_osd_full_ratio)
        t.add_cells("Backfill full ratio", cluster.settings.osd_backfill_full_ratio)
        t.add_cells("Filesafe full ratio", cluster.settings.osd_failsafe_full_ratio)
        t.add_cells("Journal aio", cluster.settings.journal_aio)
        t.add_cells("Journal dio", cluster.settings.journal_dio)
        t.add_cells("Filestorage sync", str(cluster.settings.filestore_max_sync_interval) + 's')
    report.add_block(3, "OSD:", t)

    t = html2.HTMLTable(["Setting", "Value"])
    t.add_cells("Client IO Bps", b2ssize(cluster.write_bytes_sec, False))
    t.add_cells("Client IO IOPS", b2ssize(cluster.op_per_sec, False))
    report.add_block(2, "Activity:", t)

    if len(cluster.health_summary) != 0:
        t = html2.Doc()
        for msg in cluster.health_summary:
            if msg['severity'] == "HEALTH_WARN":
                color = "orange"
            elif msg['severity'] == "HEALTH_ERR":
                color = "red"
            else:
                color = "black"

            t.font(msg['summary'].capitalize(), color=color)
            t.br

        report.add_block(2, "Status messages:", t)
def show_pools_info(report, cluster):
    table = html2.HTMLTable(headers=["Pool",
                                     "Id",
                                     "size",
                                     "min_size",
                                     "obj",
                                     "data",
                                     "free",
                                     "read",
                                     "write",
                                     "ruleset",
                                     "PG",
                                     "PGP",
                                     "PG per OSD<br>Dev %"])

    for _, pool in sorted(cluster.pools.items()):
        table.add_cell(pool.name)
        table.add_cell(str(pool.id))
        table.add_cell(str(pool.size))
        table.add_cell(str(pool.min_size))
        table.add_cell(b2ssize(int(pool.num_objects), base=1000),
                       sorttable_customkey=str(int(pool.num_objects)))
        table.add_cell(b2ssize(int(pool.size_bytes), False),
                       sorttable_customkey=str(int(pool.size_bytes)))
        table.add_cell('---')
        table.add_cell(b2ssize(int(pool.read_bytes), False),
                       sorttable_customkey=str(int(pool.read_bytes)))
        table.add_cell(b2ssize(int(pool.write_bytes), False),
                       sorttable_customkey=str(int(pool.write_bytes)))
        table.add_cell(str(pool.crush_ruleset))
        table.add_cell(str(pool.pg_num))
        table.add_cell(str(pool.pg_placement_num))

        if cluster.sum_per_osd is None:
            table.add_cell('-')
        else:
            row = [osd_pg.get(pool.name, 0)
                   for osd_pg in cluster.osd_pool_pg_2d.values()]
            avg = float(sum(row)) / len(row)
            dev = (sum((i - avg) ** 2.0 for i in row) / len(row)) ** 0.5
            table.add_cell(str(int(dev * 100. / avg)))

        table.next_row()

    report.add_block(8, "Pool's stats:", table)
def show_pools_info(report, cluster):
    table = html2.HTMLTable(headers=["Pool",
                                     "Id",
                                     "size",
                                     "min_size",
                                     "obj",
                                     "data",
                                     "free",
                                     "read",
                                     "write",
                                     "ruleset",
                                     "PG",
                                     "PGP",
                                     "PG per OSD<br>Dev %"])

    for _, pool in sorted(cluster.pools.items()):
        table.add_cell(pool.name)
        table.add_cell(str(pool.id))
        table.add_cell(str(pool.size))
        table.add_cell(str(pool.min_size))
        table.add_cell(b2ssize(int(pool.num_objects), base=1000),
                       sorttable_customkey=str(int(pool.num_objects)))
        table.add_cell(b2ssize(int(pool.size_bytes), False),
                       sorttable_customkey=str(int(pool.size_bytes)))
        table.add_cell('---')
        table.add_cell(b2ssize(int(pool.read_bytes), False),
                       sorttable_customkey=str(int(pool.read_bytes)))
        table.add_cell(b2ssize(int(pool.write_bytes), False),
                       sorttable_customkey=str(int(pool.write_bytes)))
        table.add_cell(str(pool.crush_ruleset))
        table.add_cell(str(pool.pg_num))
        table.add_cell(str(pool.pg_placement_num))

        if cluster.sum_per_osd is None:
            table.add_cell('-')
        else:
            row = [osd_pg.get(pool.name, 0)
                   for osd_pg in cluster.osd_pool_pg_2d.values()]
            avg = float(sum(row)) / len(row)
            dev = (sum((i - avg) ** 2.0 for i in row) / len(row)) ** 0.5
            table.add_cell(str(int(dev * 100. / avg)))

        table.next_row()

    report.add_block(8, "Pool's stats:", table)
Example #5
0
def show_hosts_info(report, cluster):
    header_row = [
        "Name", "Services", "CPU's", "RAM<br>total", "RAM<br>free",
        "Swap<br>used", "Load avg<br>5m"
    ]
    table = html2.HTMLTable(headers=header_row)
    for host in sorted(cluster.hosts.values(), key=lambda x: x.name):
        services = [
            "osd-{0}".format(osd.id) for osd in cluster.osds
            if osd.host == host.name
        ]
        all_mons = [mon.name for mon in cluster.mons]

        if host.name in all_mons:
            services.append("mon(" + host.name + ")")

        table.add_cell(host.name)
        srv_strs = []
        for idx in range(len(services) / 3):
            srv_strs.append(",".join(services[idx * 3:idx * 3 + 3]))
        table.add_cell("<br>".join(srv_strs))

        if host.hw_info is None:
            map(table.add_cell, ['-'] * (len(header_row) - 2))
            table.next_row()
            continue

        if host.hw_info.cores == []:
            table.add_cell("Error", sorttable_customkey='0')
        else:
            table.add_cell(sum(count for _, count in host.hw_info.cores))

        table.add_cell(b2ssize(host.mem_total),
                       sorttable_customkey=str(host.mem_total))
        table.add_cell(b2ssize(host.mem_free),
                       sorttable_customkey=str(host.mem_free))

        table.add_cell(b2ssize(host.swap_total - host.swap_free),
                       sorttable_customkey=str(host.swap_total -
                                               host.swap_free))
        table.add_cell(host.load_5m)
        table.next_row()

    report.add_block(6, "Host's info:", table)
def show_hosts_info(report, cluster):
    header_row = ["Name",
                  "Services",
                  "CPU's",
                  "RAM<br>total",
                  "RAM<br>free",
                  "Swap<br>used",
                  "Load avg<br>5m"]
    table = html2.HTMLTable(headers=header_row)
    for host in sorted(cluster.hosts.values(), key=lambda x: x.name):
        services = ["osd-{0}".format(osd.id) for osd in cluster.osds if osd.host == host.name]
        all_mons = [mon.name for mon in cluster.mons]

        if host.name in all_mons:
            services.append("mon(" + host.name + ")")

        table.add_cell(host.name)
        srv_strs = []
        for idx in range(len(services) / 3):
            srv_strs.append(",".join(services[idx * 3: idx * 3 + 3]))
        table.add_cell("<br>".join(srv_strs))

        if host.hw_info is None:
            map(table.add_cell, ['-'] * (len(header_row) - 2))
            table.next_row()
            continue

        if host.hw_info.cores == []:
            table.add_cell("Error", sorttable_customkey='0')
        else:
            table.add_cell(sum(count for _, count in host.hw_info.cores))

        table.add_cell(b2ssize(host.mem_total),
                       sorttable_customkey=str(host.mem_total))
        table.add_cell(b2ssize(host.mem_free),
                       sorttable_customkey=str(host.mem_free))

        table.add_cell(b2ssize(host.swap_total - host.swap_free),
                       sorttable_customkey=str(host.swap_total - host.swap_free))
        table.add_cell(host.load_5m)
        table.next_row()

    report.add_block(6, "Host's info:", table)
Example #7
0
def show_mons_info(report, cluster):
    table = html2.HTMLTable(
        headers=["Name", "Node", "Role", "Disk free<br>B (%)"])

    for mon in cluster.mons:
        if mon.health == "HEALTH_OK":
            health = html_ok("HEALTH_OK")
        else:
            health = html_fail(mon.health)

        line = [
            mon.name, health, mon.role,
            "{0} ({1})".format(b2ssize(mon.kb_avail * 1024, False),
                               mon.avail_percent)
        ]
        table.add_row(map(str, line))

    report.add_block(3, "Monitors info:", table)
def show_mons_info(report, cluster):
    table = html2.HTMLTable(headers=["Name", "Node", "Role",
                                     "Disk free<br>B (%)"])

    for mon in cluster.mons:
        if mon.health == "HEALTH_OK":
            health = html_ok("HEALTH_OK")
        else:
            health = html_fail(mon.health)

        line = [
            mon.name,
            health,
            mon.role,
            "{0} ({1})".format(b2ssize(mon.kb_avail * 1024, False), mon.avail_percent)
        ]
        table.add_row(map(str, line))

    report.add_block(3, "Monitors info:", table)
Example #9
0
def show_hosts_resource_usage(report, cluster):
    nets_info = {}

    for host in cluster.hosts.values():
        ceph_adapters = [
            net.name for net in (host.cluster_net, host.public_net)
            if net is not None
        ]
        nets = [
            net for net in host.net_adapters.values()
            if net.is_phy and host.cluster_net not in ceph_adapters
        ]

        nets_info[host.name] = sorted(nets, key=lambda x: x.name)

    if len(nets_info) == 0:
        max_nets = 0
    else:
        max_nets = max(map(len, nets_info.values()))

    header_row = [
        "Hostname", "Cluster net<br>dev, ip<br>settings",
        "Cluster net<br>uptime average<br>send/recv",
        "Cluster net<br>current<br>send/recv",
        "Public net<br>dev, ip<br>settings",
        "Public net<br>uptime average<br>send/recv",
        "Public net<br>current<br>send/recv"
    ]

    header_row += ["Net"] * max_nets
    row_len = len(header_row)

    table = html2.HTMLTable(headers=header_row)
    for host in sorted(cluster.hosts.values(), key=lambda x: x.name):
        perf_info = [host.name]

        for net in (host.cluster_net, host.public_net):
            if net is None:
                perf_info.extend(["-"] * 3)
            else:
                dev_ip = "{0}<br>{1}".format(net.name, net.ip)

                if host.hw_info is None or net.name not in host.hw_info.net_info:
                    perf_info.append(dev_ip)
                else:
                    speed, dtype, _ = host.hw_info.net_info[net.name]
                    settings = "{0}, {1}".format(speed, dtype)
                    perf_info.append("{0}<br>{1}".format(dev_ip, settings))

                perf_info.append("{0} / {1} Bps<br>{2} / {3} Pps".format(
                    b2ssize(float(net.perf_stats.sbytes) / host.uptime, False),
                    b2ssize(float(net.perf_stats.rbytes) / host.uptime, False),
                    b2ssize(
                        float(net.perf_stats.spackets) / host.uptime, False),
                    b2ssize(
                        float(net.perf_stats.rpackets) / host.uptime, False)))

                if net.perf_stats_curr is not None:
                    perf_info.append("{0} / {1} Bps<br>{2} / {3} Pps".format(
                        b2ssize(net.perf_stats_curr.sbytes, False),
                        b2ssize(net.perf_stats_curr.rbytes, False),
                        b2ssize(net.perf_stats_curr.spackets, False),
                        b2ssize(net.perf_stats_curr.rpackets, False),
                    ))
                else:
                    perf_info.append('-')

        for net in nets_info[host.name]:
            if net.speed is not None:
                cell_data = H.div(net.name, _class="left") + \
                            H.div(b2ssize(net.speed), _class="right")
                perf_info.append(cell_data)
            else:
                perf_info.append(net.name)
        perf_info += ['-'] * (row_len - len(perf_info))

        table.add_row(map(str, perf_info))

    report.add_block(12, "Host's resource usage:", table)
Example #10
0
def show_host_io_load_in_color(report, cluster):
    rbts = collections.defaultdict(lambda: {})
    wbts = collections.defaultdict(lambda: {})
    bts = collections.defaultdict(lambda: {})

    wiops = collections.defaultdict(lambda: {})
    riops = collections.defaultdict(lambda: {})
    iops = collections.defaultdict(lambda: {})

    queue_depth = collections.defaultdict(lambda: {})
    lat = collections.defaultdict(lambda: {})
    io_time = collections.defaultdict(lambda: {})
    w_io_time = collections.defaultdict(lambda: {})

    for osd in cluster.osds:
        for dev_stat in (osd.data_stor_stats, osd.j_stor_stats):
            if dev_stat is None or 'write_bytes_curr' not in dev_stat:
                continue

            dev = os.path.basename(dev_stat.root_dev)

            wbts[osd.host][dev] = dev_stat.write_bytes_curr
            rbts[osd.host][dev] = dev_stat.read_bytes_curr
            bts[osd.host][
                dev] = dev_stat.write_bytes_curr + dev_stat.read_bytes_curr

            wiops[osd.host][dev] = dev_stat.write_iops_curr
            riops[osd.host][dev] = dev_stat.read_iops_curr
            iops[osd.host][
                dev] = dev_stat.write_iops_curr + dev_stat.read_iops_curr

            queue_depth[osd.host][dev] = dev_stat.w_io_time_curr
            lat[osd.host][dev] = dev_stat.lat_curr * 1000
            io_time[osd.host][dev] = int(dev_stat.io_time_curr * 100)
            w_io_time[osd.host][dev] = dev_stat.w_io_time_curr

    if len(wbts) == 0:
        report.add_block(1, "No current IO load awailable", "")
        return

    loads = [
        (iops, 1000, 'IOPS', 50),
        (riops, 1000, 'Read IOPS', 30),
        (wiops, 1000, 'Write IOPS', 30),
        (bts, 1024, 'Bps', 100 * 1024**2),
        (rbts, 1024, 'Read Bps', 100 * 1024**2),
        (wbts, 1024, 'Write Bps', 100 * 1024**2),
        (lat, None, 'Latency, ms', 20),
        (queue_depth, None, 'Average QD', 3),
        (io_time, None, 'Active time %', 100),
        # (w_io_time, None, 'W. time share', 5.0),
    ]

    report.add_block(12, "Current disk IO load", "")
    for pos, (target, base, tp, min_max_val) in enumerate(loads, 1):
        if target is lat:
            max_val = 300.0
        else:
            max_val = max(map(max,
                              [data.values() for data in target.values()]))
            max_val = max(min_max_val, max_val)

        max_len = max(map(len, target.values()))

        table = html2.HTMLTable(['host'] + ['load'] * max_len, zebra=False)
        for host_name, data in sorted(target.items()):
            table.add_cell(host_name)
            for dev, val in sorted(data.items()):
                if max_val == 0:
                    color = "#FFFFFF"
                else:
                    color = val_to_color(float(val) / max_val)

                if target is lat:
                    s_val = str(int(val))
                elif target is queue_depth:
                    s_val = "%.1f" % (val, )
                elif base is None:
                    s_val = "%.1f" % (val, )
                else:
                    s_val = b2ssize(val, False, base=base)

                cell_data = H.div(dev, _class="left") + H.div(s_val,
                                                              _class="right")
                table.add_cell(cell_data,
                               bgcolor=color,
                               sorttable_customkey=str(val))

            for i in range(max_len - len(data)):
                table.add_cell("-", sorttable_customkey='0')
            table.next_row()

        ncols = max_len + 1  # header
        weight = max(2, min(12, ncols))
        report.add_block(weight, tp, table, "Disk IO - " + tp)
Example #11
0
def show_host_network_load_in_color(report, cluster):
    net_io = collections.defaultdict(lambda: {})
    send_net_io = collections.defaultdict(lambda: {})
    recv_net_io = collections.defaultdict(lambda: {})

    for host in cluster.hosts.values():
        ceph_adapters = []
        for net in (host.cluster_net, host.public_net):
            if net is not None:
                ceph_adapters.append(net.name)
            else:
                ceph_adapters.append('-')

        nets = [('cluster', host.cluster_net), ('public', host.public_net)]

        nets += [(net.name, net) for net in host.net_adapters.values()
                 if net.is_phy and net.name not in ceph_adapters]

        for name, net in nets:
            if net is None or net.perf_stats_curr is None:
                continue

            usage = max(
                (net.perf_stats_curr.sbytes, net.perf_stats_curr.rbytes))

            if usage > 0 or name in ('cluster', 'public'):
                net_io[host.name][name] = (usage, net.speed)
                send_net_io[host.name][name] = (net.perf_stats_curr.sbytes,
                                                net.speed)
                recv_net_io[host.name][name] = (net.perf_stats_curr.rbytes,
                                                net.speed)

    if len(net_io) == 0:
        report.add_block(6, "No network load awailable", "")
        return

    std_nets = set(['public', 'cluster'])

    report.add_block(12, "Network load (to max dev throughput)", "")
    loads = [
        # (net_io, "Network load max (to max dev throughput):"),
        (send_net_io, "Send"),
        (recv_net_io, "Receive"),
    ]

    for io, name in loads:
        max_len = max(len(set(data) - std_nets) for data in io.values())

        table = html2.HTMLTable(["host", "public<br>net", "cluster<br>net"] +
                                ["hw adapter"] * max_len,
                                zebra=False)

        for host_name, data in sorted(io.items()):
            net_names = list(std_nets) + sorted(set(data) - std_nets)

            table.add_cell(host_name)
            for net_name in net_names:
                if net_name not in data:
                    table.add_cell('-')
                    continue

                usage, speed = data[net_name]
                if speed is None:
                    color = "#FFFFFF"
                else:
                    color = val_to_color(float(usage) / speed)

                if net_name not in std_nets:
                    text = H.div(net_name, _class="left") + \
                           H.div(b2ssize(usage, False), _class="right")
                else:
                    text = b2ssize(usage, False)

                table.add_cell(text,
                               bgcolor=color,
                               sorttable_customkey=str(usage))

            for i in range(max_len + len(std_nets) - len(data.items())):
                table.add_cell("-", sorttable_customkey='0')

            table.next_row()

        ncols = max_len + len(std_nets) + 1  # header
        weight = max(2, min(12, ncols))
        report.add_block(weight, name, table, "Network - " + name)
Example #12
0
def show_osd_perf_info(report, cluster):
    table = html2.HTMLTable(headers=[
        "OSD",
        "node",
        "apply<br>lat, ms",
        "commit<br>lat, ms",
        "D dev",
        "D read<br>Bps",
        "D write<br>Bps",
        "D read<br>OPS",
        "D write<br>OPS",
        "D IO<br>time %",
        "J dev",
        "J read<br>Bps",
        "J write<br>Bps",
        "J read<br>OPS",
        "J write<br>OPS",
        "J IO<br>time %",
    ])

    for osd in cluster.osds:
        if osd.osd_perf is not None:
            apply_latency_ms = osd.osd_perf["apply_latency_ms"]
            commit_latency_ms = osd.osd_perf["commit_latency_ms"]
        else:
            apply_latency_ms = HTML_UNKNOWN
            commit_latency_ms = HTML_UNKNOWN

        perf_info = []

        for dev_stat in (osd.data_stor_stats, osd.j_stor_stats):
            if dev_stat is None or 'read_bytes_uptime' not in dev_stat:
                perf_info.extend([('-', 0)] * 6)
                continue

            perf_info.extend([(os.path.basename(dev_stat.root_dev),
                               os.path.basename(dev_stat.root_dev)),
                              (b2ssize(dev_stat.read_bytes_uptime,
                                       False), dev_stat.read_bytes_uptime),
                              (b2ssize(dev_stat.write_bytes_uptime,
                                       False), dev_stat.write_bytes_uptime),
                              (b2ssize(dev_stat.read_iops_uptime,
                                       False), dev_stat.read_iops_uptime),
                              (b2ssize(dev_stat.write_iops_uptime,
                                       False), dev_stat.write_iops_uptime),
                              (int(dev_stat.io_time_uptime * 100),
                               dev_stat.io_time_uptime)])

        table.add_cell(str(osd.id))
        table.add_cell(osd.host)
        table.add_cell(str(apply_latency_ms))
        table.add_cell(str(commit_latency_ms))
        for val, sorted_val in perf_info:
            table.add_cell(str(val), sorttable_customkey=str(sorted_val))
        table.next_row()

    report.add_block(8, "OSD's load uptime average:", table)

    table = html2.HTMLTable(headers=[
        "OSD",
        "node",
        "D dev",
        "D read<br>Bps",
        "D write<br>Bps",
        "D read<br>OPS",
        "D write<br>OPS",
        "D lat<br>ms",
        "D IO<br>time %",
        "J dev",
        "J read<br>Bps",
        "J write<br>Bps",
        "J read<br>OPS",
        "J write<br>OPS",
        "J lat<br>ms",
        "J IO<br>time %",
    ])

    have_any_data = False
    for osd in cluster.osds:
        perf_info = []

        have_data = False
        for dev_stat in (osd.data_stor_stats, osd.j_stor_stats):
            if dev_stat is None or 'read_bytes_curr' not in dev_stat:
                perf_info.extend([('-', 0)] * 7)
                continue

            have_data = True
            have_any_data = True
            perf_info.extend([
                (os.path.basename(dev_stat.root_dev), None),
                (b2ssize(dev_stat.read_bytes_curr,
                         False), dev_stat.read_bytes_curr),
                (b2ssize(dev_stat.write_bytes_curr,
                         False), dev_stat.write_bytes_curr),
                (b2ssize(dev_stat.read_iops_curr,
                         False), dev_stat.read_iops_curr),
                (b2ssize(dev_stat.write_iops_curr,
                         False), dev_stat.write_iops_curr),
                (int(dev_stat.lat_curr * 1000), dev_stat.lat_curr),
                (int(dev_stat.io_time_curr * 100), dev_stat.io_time_curr)
            ])

        if have_data:
            table.add_cell(str(osd.id))
            table.add_cell(osd.host)
            for val, sorted_val in perf_info:
                table.add_cell(str(val), sorttable_customkey=str(sorted_val))
            table.next_row()

    if have_any_data:
        report.add_block(8, "OSD's current load:", table)
    else:
        report.add_block(6, "OSD's current load unawailable", "")
Example #13
0
def show_osd_info(report, cluster):
    table = html2.HTMLTable(headers=[
        "OSD", "node", "status", "daemon<br>run", "weight<br>reweight",
        "PG count", "Storage<br>used", "Storage<br>free", "Storage<br>free %",
        "Journal<br>on same<br>disk", "Journal<br>on SSD", "Journal<br>on file"
    ])

    for osd in cluster.osds:
        if osd.daemon_runs is None:
            daemon_msg = HTML_UNKNOWN
        elif osd.daemon_runs:
            daemon_msg = html_ok('yes')
        else:
            daemon_msg = html_fail('no')

        if osd.data_stor_stats is not None:
            used_b = osd.data_stor_stats.get('used')
            avail_b = osd.data_stor_stats.get('avail')
            avail_perc = int((avail_b * 100.0) / (avail_b + used_b) + 0.5)

            if avail_perc < 20:
                color = "red"
            elif avail_perc < 40:
                color = "yellow"
            else:
                color = "green"
            avail_perc_str = H.font(avail_perc, color=color)

            if osd.data_stor_stats.root_dev == osd.j_stor_stats.root_dev:
                j_on_same_drive = html_fail("yes")
            else:
                j_on_same_drive = html_ok("no")

            if osd.data_stor_stats.dev != osd.j_stor_stats.dev:
                j_on_file = html_ok("no")
            else:
                j_on_file = html_fail("yes")

            if osd.j_stor_stats.is_ssd:
                j_on_ssd = html_ok("yes")
            else:
                j_on_ssd = html_fail("no")
        else:
            used_b = HTML_UNKNOWN
            avail_b = HTML_UNKNOWN
            avail_perc_str = HTML_UNKNOWN
            j_on_same_drive = HTML_UNKNOWN
            j_on_file = HTML_UNKNOWN

        if osd.status == 'up':
            status = html_ok("up")
        else:
            status = html_fail("down")

        table.add_cell(str(osd.id))
        table.add_cell(osd.host)
        table.add_cell(status)
        table.add_cell(daemon_msg)

        str_w = "%.3f / %.3f" % (float(osd.crush_weight), float(osd.reweight))

        table.add_cell(str_w,
                       sorttable_customkey=str(
                           float(osd.crush_weight) * float(osd.reweight)))

        if osd.pg_count is None:
            table.add_cell(HTML_UNKNOWN, sorttable_customkey=0)
        else:
            table.add_cell(str(osd.pg_count))

        if isinstance(used_b, str):
            table.add_cell(used_b, sorttable_customkey='0')
        else:
            table.add_cell(b2ssize(used_b, False), sorttable_customkey=used_b)

        if isinstance(avail_b, str):
            table.add_cell(avail_b, sorttable_customkey='0')
        else:
            table.add_cell(b2ssize(avail_b, False),
                           sorttable_customkey=avail_b)

        table.add_cell(avail_perc_str)
        table.add_cell(j_on_same_drive)
        table.add_cell(j_on_ssd)
        table.add_cell(j_on_file)
        table.next_row()

    report.add_block(8, "OSD's info:", table)
def show_osd_info(report, cluster):
    table = html2.HTMLTable(headers=["OSD",
                                     "node",
                                     "status",
                                     "daemon<br>run",
                                     "weight<br>reweight",
                                     "PG count",
                                     "Storage<br>used",
                                     "Storage<br>free",
                                     "Storage<br>free %",
                                     "Journal<br>on same<br>disk",
                                     "Journal<br>on SSD",
                                     "Journal<br>on file"])

    for osd in cluster.osds:
        if osd.daemon_runs is None:
            daemon_msg = HTML_UNKNOWN
        elif osd.daemon_runs:
            daemon_msg = html_ok('yes')
        else:
            daemon_msg = html_fail('no')

        if osd.data_stor_stats is not None:
            used_b = osd.data_stor_stats.get('used')
            avail_b = osd.data_stor_stats.get('avail')
            avail_perc = int((avail_b * 100.0) / (avail_b + used_b) + 0.5)

            if avail_perc < 20:
                color = "red"
            elif avail_perc < 40:
                color = "yellow"
            else:
                color = "green"
            avail_perc_str = H.font(avail_perc, color=color)

            if osd.data_stor_stats.root_dev == osd.j_stor_stats.root_dev:
                j_on_same_drive = html_fail("yes")
            else:
                j_on_same_drive = html_ok("no")

            if osd.data_stor_stats.dev != osd.j_stor_stats.dev:
                j_on_file = html_ok("no")
            else:
                j_on_file = html_fail("yes")

            if osd.j_stor_stats.is_ssd:
                j_on_ssd = html_ok("yes")
            else:
                j_on_ssd = html_fail("no")
        else:
            used_b = HTML_UNKNOWN
            avail_b = HTML_UNKNOWN
            avail_perc_str = HTML_UNKNOWN
            j_on_same_drive = HTML_UNKNOWN
            j_on_file = HTML_UNKNOWN

        if osd.status == 'up':
            status = html_ok("up")
        else:
            status = html_fail("down")

        table.add_cell(str(osd.id))
        table.add_cell(osd.host)
        table.add_cell(status)
        table.add_cell(daemon_msg)

        str_w = "%.3f / %.3f" % (float(osd.crush_weight), float(osd.reweight))

        table.add_cell(str_w, sorttable_customkey=str(float(osd.crush_weight) * float(osd.reweight)))

        if osd.pg_count is None:
            table.add_cell(HTML_UNKNOWN, sorttable_customkey=0)
        else:
            table.add_cell(str(osd.pg_count))

        if isinstance(used_b, str):
            table.add_cell(used_b, sorttable_customkey='0')
        else:
            table.add_cell(b2ssize(used_b, False), sorttable_customkey=used_b)

        if isinstance(avail_b, str):
            table.add_cell(avail_b, sorttable_customkey='0')
        else:
            table.add_cell(b2ssize(avail_b, False), sorttable_customkey=avail_b)

        table.add_cell(avail_perc_str)
        table.add_cell(j_on_same_drive)
        table.add_cell(j_on_ssd)
        table.add_cell(j_on_file)
        table.next_row()

    report.add_block(8, "OSD's info:", table)
def show_hosts_resource_usage(report, cluster):
    nets_info = {}

    for host in cluster.hosts.values():
        ceph_adapters = [net.name
                         for net in (host.cluster_net, host.public_net)
                         if net is not None]
        nets = [net for net in host.net_adapters.values()
                if net.is_phy and host.cluster_net not in ceph_adapters]

        nets_info[host.name] = sorted(nets, key=lambda x: x.name)

    if len(nets_info) == 0:
        max_nets = 0
    else:
        max_nets = max(map(len, nets_info.values()))

    header_row = ["Hostname",
                  "Cluster net<br>dev, ip<br>settings",
                  "Cluster net<br>uptime average<br>send/recv",
                  "Cluster net<br>current<br>send/recv",
                  "Public net<br>dev, ip<br>settings",
                  "Public net<br>uptime average<br>send/recv",
                  "Public net<br>current<br>send/recv"]

    header_row += ["Net"] * max_nets
    row_len = len(header_row)

    table = html2.HTMLTable(headers=header_row)
    for host in sorted(cluster.hosts.values(), key=lambda x: x.name):
        perf_info = [host.name]

        for net in (host.cluster_net, host.public_net):
            if net is None:
                perf_info.extend(["-"] * 3)
            else:
                dev_ip = "{0}<br>{1}".format(net.name, net.ip)

                if host.hw_info is None or net.name not in host.hw_info.net_info:
                    perf_info.append(dev_ip)
                else:
                    speed, dtype, _ = host.hw_info.net_info[net.name]
                    settings = "{0}, {1}".format(speed, dtype)
                    perf_info.append("{0}<br>{1}".format(dev_ip, settings))

                perf_info.append("{0} / {1} Bps<br>{2} / {3} Pps".format(
                    b2ssize(float(net.perf_stats.sbytes) / host.uptime, False),
                    b2ssize(float(net.perf_stats.rbytes) / host.uptime, False),
                    b2ssize(float(net.perf_stats.spackets) / host.uptime, False),
                    b2ssize(float(net.perf_stats.rpackets) / host.uptime, False)
                ))

                if net.perf_stats_curr is not None:
                    perf_info.append("{0} / {1} Bps<br>{2} / {3} Pps".format(
                        b2ssize(net.perf_stats_curr.sbytes, False),
                        b2ssize(net.perf_stats_curr.rbytes, False),
                        b2ssize(net.perf_stats_curr.spackets, False),
                        b2ssize(net.perf_stats_curr.rpackets, False),
                    ))
                else:
                    perf_info.append('-')

        for net in nets_info[host.name]:
            if net.speed is not None:
                cell_data = H.div(net.name, _class="left") + \
                            H.div(b2ssize(net.speed), _class="right")
                perf_info.append(cell_data)
            else:
                perf_info.append(net.name)
        perf_info += ['-'] * (row_len - len(perf_info))

        table.add_row(map(str, perf_info))

    report.add_block(12, "Host's resource usage:", table)
def show_host_io_load_in_color(report, cluster):
    rbts = collections.defaultdict(lambda: {})
    wbts = collections.defaultdict(lambda: {})
    bts = collections.defaultdict(lambda: {})

    wiops = collections.defaultdict(lambda: {})
    riops = collections.defaultdict(lambda: {})
    iops = collections.defaultdict(lambda: {})

    queue_depth = collections.defaultdict(lambda: {})
    lat = collections.defaultdict(lambda: {})
    io_time = collections.defaultdict(lambda: {})
    w_io_time = collections.defaultdict(lambda: {})

    for osd in cluster.osds:
        for dev_stat in (osd.data_stor_stats, osd.j_stor_stats):
            if dev_stat is None or 'write_bytes_curr' not in dev_stat:
                continue

            dev = os.path.basename(dev_stat.root_dev)

            wbts[osd.host][dev] = dev_stat.write_bytes_curr
            rbts[osd.host][dev] = dev_stat.read_bytes_curr
            bts[osd.host][dev] = dev_stat.write_bytes_curr + dev_stat.read_bytes_curr

            wiops[osd.host][dev] = dev_stat.write_iops_curr
            riops[osd.host][dev] = dev_stat.read_iops_curr
            iops[osd.host][dev] = dev_stat.write_iops_curr + dev_stat.read_iops_curr

            queue_depth[osd.host][dev] = dev_stat.w_io_time_curr
            lat[osd.host][dev] = dev_stat.lat_curr * 1000
            io_time[osd.host][dev] = int(dev_stat.io_time_curr * 100)
            w_io_time[osd.host][dev] = dev_stat.w_io_time_curr

    if len(wbts) == 0:
        report.add_block(1, "No current IO load awailable", "")
        return

    loads = [
        (iops, 1000, 'IOPS', 50),
        (riops, 1000, 'Read IOPS', 30),
        (wiops, 1000, 'Write IOPS', 30),

        (bts, 1024, 'Bps', 100 * 1024 ** 2),
        (rbts, 1024, 'Read Bps', 100 * 1024 ** 2),
        (wbts, 1024, 'Write Bps', 100 * 1024 ** 2),

        (lat, None, 'Latency, ms', 20),
        (queue_depth, None, 'Average QD', 3),
        (io_time, None, 'Active time %', 100),
        # (w_io_time, None, 'W. time share', 5.0),
    ]

    report.add_block(12, "Current disk IO load", "")
    for pos, (target, base, tp, min_max_val) in enumerate(loads, 1):
        if target is lat:
            max_val = 300.0
        else:
            max_val = max(map(max, [data.values() for data in target.values()]))
            max_val = max(min_max_val, max_val)

        max_len = max(map(len, target.values()))

        table = html2.HTMLTable(['host'] + ['load'] * max_len, zebra=False)
        for host_name, data in sorted(target.items()):
            table.add_cell(host_name)
            for dev, val in sorted(data.items()):
                if max_val == 0:
                    color = "#FFFFFF"
                else:
                    color = val_to_color(float(val) / max_val)

                if target is lat:
                    s_val = str(int(val))
                elif target is queue_depth:
                    s_val = "%.1f" % (val,)
                elif base is None:
                    s_val = "%.1f" % (val,)
                else:
                    s_val = b2ssize(val, False, base=base)

                cell_data = H.div(dev, _class="left") + H.div(s_val, _class="right")
                table.add_cell(cell_data,
                               bgcolor=color,
                               sorttable_customkey=str(val))

            for i in range(max_len - len(data)):
                table.add_cell("-", sorttable_customkey='0')
            table.next_row()

        ncols = max_len + 1  # header
        weight = max(2, min(12, ncols))
        report.add_block(weight, tp, table, "Disk IO - " + tp)
def show_host_network_load_in_color(report, cluster):
    net_io = collections.defaultdict(lambda: {})
    send_net_io = collections.defaultdict(lambda: {})
    recv_net_io = collections.defaultdict(lambda: {})

    for host in cluster.hosts.values():
        ceph_adapters = []
        for net in (host.cluster_net, host.public_net):
            if net is not None:
                ceph_adapters.append(net.name)
            else:
                ceph_adapters.append('-')

        nets = [('cluster', host.cluster_net), ('public', host.public_net)]

        nets += [(net.name, net)
                 for net in host.net_adapters.values()
                 if net.is_phy and net.name not in ceph_adapters]

        for name, net in nets:
            if net is None or net.perf_stats_curr is None:
                continue

            usage = max((net.perf_stats_curr.sbytes,
                         net.perf_stats_curr.rbytes))

            if usage > 0 or name in ('cluster', 'public'):
                net_io[host.name][name] = (usage, net.speed)
                send_net_io[host.name][name] = (net.perf_stats_curr.sbytes, net.speed)
                recv_net_io[host.name][name] = (net.perf_stats_curr.rbytes, net.speed)

    if len(net_io) == 0:
        report.add_block(6, "No network load awailable", "")
        return

    std_nets = set(['public', 'cluster'])

    report.add_block(12, "Network load (to max dev throughput)", "")
    loads = [
        # (net_io, "Network load max (to max dev throughput):"),
        (send_net_io, "Send"),
        (recv_net_io, "Receive"),
    ]

    for io, name in loads:
        max_len = max(len(set(data) - std_nets) for data in io.values())

        table = html2.HTMLTable(
            ["host", "public<br>net", "cluster<br>net"] + ["hw adapter"] * max_len,
            zebra=False
        )

        for host_name, data in sorted(io.items()):
            net_names = list(std_nets) + sorted(set(data) - std_nets)

            table.add_cell(host_name)
            for net_name in net_names:
                if net_name not in data:
                    table.add_cell('-')
                    continue

                usage, speed = data[net_name]
                if speed is None:
                    color = "#FFFFFF"
                else:
                    color = val_to_color(float(usage) / speed)

                if net_name not in std_nets:
                    text = H.div(net_name, _class="left") + \
                           H.div(b2ssize(usage, False), _class="right")
                else:
                    text = b2ssize(usage, False)

                table.add_cell(text,
                               bgcolor=color,
                               sorttable_customkey=str(usage))

            for i in range(max_len + len(std_nets) - len(data.items())):
                table.add_cell("-", sorttable_customkey='0')

            table.next_row()

        ncols = max_len + len(std_nets) + 1  # header
        weight = max(2, min(12, ncols))
        report.add_block(weight, name, table, "Network - " + name)
def show_osd_perf_info(report, cluster):
    table = html2.HTMLTable(headers=["OSD",
                                     "node",
                                     "apply<br>lat, ms",
                                     "commit<br>lat, ms",
                                     "D dev",
                                     "D read<br>Bps",
                                     "D write<br>Bps",
                                     "D read<br>OPS",
                                     "D write<br>OPS",
                                     "D IO<br>time %",
                                     "J dev",
                                     "J read<br>Bps",
                                     "J write<br>Bps",
                                     "J read<br>OPS",
                                     "J write<br>OPS",
                                     "J IO<br>time %",
                                     ])

    for osd in cluster.osds:
        if osd.osd_perf is not None:
            apply_latency_ms = osd.osd_perf["apply_latency_ms"]
            commit_latency_ms = osd.osd_perf["commit_latency_ms"]
        else:
            apply_latency_ms = HTML_UNKNOWN
            commit_latency_ms = HTML_UNKNOWN

        perf_info = []

        for dev_stat in (osd.data_stor_stats, osd.j_stor_stats):
            if dev_stat is None or 'read_bytes_uptime' not in dev_stat:
                perf_info.extend([('-', 0)] * 6)
                continue

            perf_info.extend([
                (os.path.basename(dev_stat.root_dev), os.path.basename(dev_stat.root_dev)),
                (b2ssize(dev_stat.read_bytes_uptime, False), dev_stat.read_bytes_uptime),
                (b2ssize(dev_stat.write_bytes_uptime, False), dev_stat.write_bytes_uptime),
                (b2ssize(dev_stat.read_iops_uptime, False), dev_stat.read_iops_uptime),
                (b2ssize(dev_stat.write_iops_uptime, False), dev_stat.write_iops_uptime),
                (int(dev_stat.io_time_uptime * 100), dev_stat.io_time_uptime)
            ])

        table.add_cell(str(osd.id))
        table.add_cell(osd.host)
        table.add_cell(str(apply_latency_ms))
        table.add_cell(str(commit_latency_ms))
        for val, sorted_val in perf_info:
            table.add_cell(str(val), sorttable_customkey=str(sorted_val))
        table.next_row()

    report.add_block(8, "OSD's load uptime average:", table)

    table = html2.HTMLTable(headers=["OSD",
                                     "node",
                                     "D dev",
                                     "D read<br>Bps",
                                     "D write<br>Bps",
                                     "D read<br>OPS",
                                     "D write<br>OPS",
                                     "D lat<br>ms",
                                     "D IO<br>time %",
                                     "J dev",
                                     "J read<br>Bps",
                                     "J write<br>Bps",
                                     "J read<br>OPS",
                                     "J write<br>OPS",
                                     "J lat<br>ms",
                                     "J IO<br>time %",
                                     ])

    have_any_data = False
    for osd in cluster.osds:
        perf_info = []

        have_data = False
        for dev_stat in (osd.data_stor_stats, osd.j_stor_stats):
            if dev_stat is None or 'read_bytes_curr' not in dev_stat:
                perf_info.extend([('-', 0)] * 7)
                continue

            have_data = True
            have_any_data = True
            perf_info.extend([
                (os.path.basename(dev_stat.root_dev), None),
                (b2ssize(dev_stat.read_bytes_curr, False), dev_stat.read_bytes_curr),
                (b2ssize(dev_stat.write_bytes_curr, False), dev_stat.write_bytes_curr),
                (b2ssize(dev_stat.read_iops_curr, False), dev_stat.read_iops_curr),
                (b2ssize(dev_stat.write_iops_curr, False), dev_stat.write_iops_curr),
                (int(dev_stat.lat_curr * 1000), dev_stat.lat_curr),
                (int(dev_stat.io_time_curr * 100), dev_stat.io_time_curr)
            ])

        if have_data:
            table.add_cell(str(osd.id))
            table.add_cell(osd.host)
            for val, sorted_val in perf_info:
                table.add_cell(str(val), sorttable_customkey=str(sorted_val))
            table.next_row()

    if have_any_data:
        report.add_block(8, "OSD's current load:", table)
    else:
        report.add_block(6, "OSD's current load unawailable", "")