/
ptable.py
143 lines (125 loc) · 5.71 KB
/
ptable.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# TODO:
# - handle tab and other non-monospace characters better
# - recognize numeric columns and right justify them
from collections import defaultdict
from itertools import zip_longest
from operator import itemgetter
from statistics import median_low
def _squeeze(lines, width):
"""
Squeeze the contents of a cell into a fixed width column, breaking lines on spaces where possible.
:param lines: list of string lines in the cell
:param width: fixed width of the column
:return: list of lines squeezed to fit
"""
if all(len(line) <= width for line in lines):
return lines
out = []
token_lines = [line.split(" ") for line in lines]
for token_line in token_lines:
line = []
for token in token_line:
if sum(len(token) for token in line) + len(token) + len(line) > width:
if line:
out.append(" ".join(line))
while len(token) > width:
out.append(token[:width])
token = token[width:]
line = [token]
else:
line.append(token)
out.append(" ".join(line))
return out
def fmt(fmt_str):
"""
Helper to convert a format string into a function that applies its argument to the format.
:param fmt_str: string appropriate for using with string.format
:return: A function which applies its single argument to fmt_str
"""
return fmt_str.format
def ljust(s, width):
return s.ljust(width)
def rjust(s, width):
return s.rjust(width)
def cjust(s, width):
if width <= len(s):
return s
extra = width - len(s)
r = extra // 2
l = r + extra % 2
s = s.ljust(l + len(s))
return s.rjust(r + len(s))
def ptable(headers, *rows, max_width=200, str=str, str_by_type={}, justification=()):
"""
Make an easily readable table.
:param headers: iterable of header values
:param rows: iterables of column contents (must all be the same length as headers)
:param max_width: maximum number of columns for the table (including margins and separators)
:param str: function to convert items to strings
:param str_by_type: dictionary mapping types to custom str functions to be used for those types
used in preference over default str except for headers which always use default str
:return: a string containing the table
"""
headers = [str(h).split("\n") for h in headers]
rows = [[(str_by_type.get(type(c)) or str)(c).split("\n") for c in row] for row in rows]
assert len(set(len(row) for row in rows) | {len(headers)}) == 1, "headers and rows must have same number of columns"
# 2 chars padding on the left, 3 between each column, 2 on the right
available_width = max_width - 2 - (len(headers) - 1) * 3 - 2
assert available_width >= len(headers), "must provide enough width for at least one character per column"
# use the max and median widths of each column to see if any need to be squeezed to fit the overall max_width
col_width_maxes = []
col_width_medians = []
# zip(*rows) transposes to columns
# zip_longest in case there are no rows
for header, col in zip_longest(headers, zip(*rows), fillvalue=()):
widths = [max(len(line) for line in r) for r in col]
widths.append(max(len(line) for line in header))
col_width_maxes.append(max(widths))
col_width_medians.append(median_low(widths))
col_widths = col_width_maxes
if sum(col_width_maxes) > available_width:
# reduce the column with the greatest difference between max and median width by one repeatedly until it fits
diffs = {i: d for i, d in enumerate(mx - md for mx, md in zip(col_width_maxes, col_width_medians))}
to_chop = defaultdict(int)
while sum(col_width_maxes) - sum(to_chop.values()) > available_width:
i, _ = max(diffs.items(), key=itemgetter(1))
diffs[i] -= 1
to_chop[i] += 1
for i, tc in to_chop.items():
col_widths[i] -= tc
headers = [_squeeze(h, col_widths[i]) for i, h in enumerate(headers)]
rows = [[_squeeze(c, col_widths[i]) for i, c in enumerate(row)] for row in rows]
# recalculate the max width after the squeeze and use the lesser of that and the current width to avoid
# whitespace at the end of wrapped lines
for i, (header, col) in enumerate(zip_longest(headers, zip(*rows), fillvalue=())):
widths = [max(len(line) for line in r) for r in col]
widths.append(max(len(line) for line in header))
col_widths[i] = min(col_widths[i], max(widths))
out = ""
header_height = max(len(h) for h in headers)
for header in headers:
header.extend([""] * (header_height - len(header)))
for line in zip(*headers):
out += "| {} |\n".format(
" | ".join(
just(col_line, col_widths[i])
for i, (just, col_line)
in enumerate(zip_longest(justification, line, fillvalue=ljust))))
out += "|"
for just, cw in zip_longest(justification, col_widths, fillvalue=ljust):
# markdown justification markers
l = ":" if just in (ljust, cjust) else " "
r = ":" if just in (cjust, rjust) else " "
out += "{}{}{}|".format(l, "-" * cw, r)
out += "\n"
for row in rows:
row_height = max(len(c) for c in row)
for col in row:
col.extend([""] * (row_height - len(col)))
for line in zip(*row):
out += "| {} |\n".format(
" | ".join(
just(col_line, col_widths[i])
for i, (just, col_line)
in enumerate(zip_longest(justification, line, fillvalue=ljust))))
return out