def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. """ fmt = "{{{things}}}" pairs = [] pfmt = "{key}: {val}" if max_seq_items is False: nitems = len(seq) else: nitems = max_seq_items or get_option("max_seq_items") or len(seq) for k, v in list(seq.items())[:nitems]: pairs.append( pfmt.format( key=pprint_thing(k, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds), val=pprint_thing(v, _nest_lvl + 1, max_seq_items=max_seq_items, **kwds))) if nitems < len(seq): return fmt.format(things=", ".join(pairs) + ", ...") else: return fmt.format(things=", ".join(pairs))
def _write_table(self, indent=0): _classes = ['dataframe'] # Default class. use_mathjax = get_option("display.html.use_mathjax") if not use_mathjax: _classes.append('tex2jax_ignore') if self.classes is not None: if isinstance(self.classes, str): self.classes = self.classes.split() if not isinstance(self.classes, (list, tuple)): raise TypeError('classes must be a string, list, or tuple, ' 'not {typ}'.format(typ=type(self.classes))) _classes.extend(self.classes) if self.table_id is None: id_section = "" else: id_section = ' id="{table_id}"'.format(table_id=self.table_id) self.write('<table border="{border}" class="{cls}"{id_section}>' .format(border=self.border, cls=' '.join(_classes), id_section=id_section), indent) if self.fmt.header or self.show_row_idx_names: self._write_header(indent + self.indent_delta) self._write_body(indent + self.indent_delta) self.write('</table>', indent)
def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds): """ internal. pprinter for iterables. you should probably use pprint_thing() rather then calling this directly. bounds length of printed sequence, depending on options """ if isinstance(seq, set): fmt = "{{{body}}}" else: fmt = "[{body}]" if hasattr(seq, '__setitem__') else "({body})" if max_seq_items is False: nitems = len(seq) else: nitems = max_seq_items or get_option("max_seq_items") or len(seq) s = iter(seq) # handle sets, no slicing r = [pprint_thing(next(s), _nest_lvl + 1, max_seq_items=max_seq_items, **kwds) for i in range(min(nitems, len(seq)))] body = ", ".join(r) if nitems < len(seq): body += ", ..." elif isinstance(seq, tuple) and len(seq) == 1: body += ',' return fmt.format(body=body)
def _use_inf_as_na(key): """Option change callback for na/inf behaviour Choose which replacement for numpy.isnan / -numpy.isfinite is used. Parameters ---------- flag: bool True means treat None, NaN, INF, -INF as null (old way), False means None and NaN are null, but INF, -INF are not null (new way). Notes ----- This approach to setting global module values is discussed and approved here: * http://stackoverflow.com/questions/4859217/ programmatically-creating-variables-in-python/4859312#4859312 """ from pandas._config import get_option flag = get_option(key) if flag: globals()['_isna'] = _isna_old else: globals()['_isna'] = _isna_new
def _format_data(self): # TODO: integrate with categorical and make generic # name argument is unused here; just for compat with base / categorical n = len(self) max_seq_items = min((get_option( 'display.max_seq_items') or n) // 10, 10) formatter = str if n == 0: summary = '[]' elif n == 1: first = formatter(self[0]) summary = '[{first}]'.format(first=first) elif n == 2: first = formatter(self[0]) last = formatter(self[-1]) summary = '[{first}, {last}]'.format(first=first, last=last) else: if n > max_seq_items: n = min(max_seq_items // 2, 10) head = [formatter(x) for x in self[:n]] tail = [formatter(x) for x in self[-n:]] summary = '[{head} ... {tail}]'.format( head=', '.join(head), tail=', '.join(tail)) else: tail = [formatter(x) for x in self] summary = '[{tail}]'.format(tail=', '.join(tail)) return summary
def __bytes__(self): """ Return a string representation for a particular object. """ from pandas._config import get_option encoding = get_option("display.encoding") return self.__unicode__().encode(encoding, 'replace')
def _format_attrs(self): """ Return a list of tuples of the (attr,formatted_value) """ max_categories = (10 if get_option("display.max_categories") == 0 else get_option("display.max_categories")) attrs = [ ('categories', ibase.default_pprint(self.categories, max_seq_items=max_categories)), ('ordered', self.ordered)] if self.name is not None: attrs.append(('name', ibase.default_pprint(self.name))) attrs.append(('dtype', "'%s'" % self.dtype.name)) max_seq_items = get_option('display.max_seq_items') or len(self) if len(self) > max_seq_items: attrs.append(('length', len(self))) return attrs
def __bytes__(self): """ Return a string representation for a particular object. Invoked by bytes(obj) in py3 only. Yields a bytestring in both py2/py3. """ from pandas._config import get_option encoding = get_option("display.encoding") return self.__unicode__().encode(encoding, 'replace')
def __init__(self, formatter, classes=None, border=None): self.fmt = formatter self.classes = classes self.frame = self.fmt.frame self.columns = self.fmt.tr_frame.columns self.elements = [] self.bold_rows = self.fmt.kwds.get('bold_rows', False) self.escape = self.fmt.kwds.get('escape', True) self.show_dimensions = self.fmt.show_dimensions if border is None: border = get_option('display.html.border') self.border = border self.table_id = self.fmt.table_id self.render_links = self.fmt.render_links
def _get_level_lengths(index, hidden_elements=None): """ Given an index, find the level length for each element. Optional argument is a list of index positions which should not be visible. Result is a dictionary of (level, inital_position): span """ sentinel = object() levels = index.format(sparsify=sentinel, adjoin=False, names=False) if hidden_elements is None: hidden_elements = [] lengths = {} if index.nlevels == 1: for i, value in enumerate(levels): if(i not in hidden_elements): lengths[(0, i)] = 1 return lengths for i, lvl in enumerate(levels): for j, row in enumerate(lvl): if not get_option('display.multi_sparse'): lengths[(i, j)] = 1 elif (row != sentinel) and (j not in hidden_elements): last_label = j lengths[(i, last_label)] = 1 elif (row != sentinel): # even if its hidden, keep track of it in case # length >1 and later elements are visible last_label = j lengths[(i, last_label)] = 0 elif(j not in hidden_elements): lengths[(i, last_label)] += 1 non_zero_lengths = { element: length for element, length in lengths.items() if length >= 1} return non_zero_lengths
def __init__(self, data, precision=None, table_styles=None, uuid=None, caption=None, table_attributes=None, cell_ids=True): self.ctx = defaultdict(list) self._todo = [] if not isinstance(data, (pd.Series, pd.DataFrame)): raise TypeError("``data`` must be a Series or DataFrame") if data.ndim == 1: data = data.to_frame() if not data.index.is_unique or not data.columns.is_unique: raise ValueError("style is not supported for non-unique indices.") self.data = data self.index = data.index self.columns = data.columns self.uuid = uuid self.table_styles = table_styles self.caption = caption if precision is None: precision = get_option('display.precision') self.precision = precision self.table_attributes = table_attributes self.hidden_index = False self.hidden_columns = [] self.cell_ids = cell_ids # display_funcs maps (row, col) -> formatting function def default_display_func(x): if is_float(x): return '{:>.{precision}g}'.format(x, precision=self.precision) else: return x self._display_funcs = defaultdict(lambda: default_display_func)
def format_object_attrs(obj): """ Return a list of tuples of the (attr, formatted_value) for common attrs, including dtype, name, length Parameters ---------- obj : object must be iterable Returns ------- list """ attrs = [] if hasattr(obj, 'dtype'): attrs.append(('dtype', "'{}'".format(obj.dtype))) if getattr(obj, 'name', None) is not None: attrs.append(('name', default_pprint(obj.name))) max_seq_items = get_option('display.max_seq_items') or len(obj) if len(obj) > max_seq_items: attrs.append(('length', len(obj))) return attrs
ABCMultiIndex, ABCPeriodIndex, ABCSeries) from pandas.core.dtypes.missing import isna, notna import pandas.core.common as com from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib import converter from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0 from pandas.plotting._matplotlib.style import _get_standard_colors from pandas.plotting._matplotlib.tools import (_flatten, _get_all_lines, _get_xlim, _handle_shared_axes, _subplots, format_date_labels, table) if get_option('plotting.matplotlib.register_converters'): converter.register(explicit=False) class MPLPlot: """ Base class for assembling a pandas plot using matplotlib Parameters ---------- data : """ @property def _kind(self): """Specify kind str. Must be overridden in child class"""
def max_rows(self) -> int: """Maximum info rows to be displayed.""" return get_option("display.max_info_rows", len(self.data) + 1)
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, quote_strings=False, max_seq_items=None): """ This function is the sanctioned way of converting objects to a unicode representation. properly handles nested sequences containing unicode strings (unicode(object) does not) Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : False, int, default None Pass thru to other pretty printers to limit sequence printing Returns ------- result - unicode str """ def as_escaped_unicode(thing, escape_chars=escape_chars): # Unicode is fine, else we try to decode using utf-8 and 'replace' # if that's not it either, we have no way of knowing and the user # should deal with it himself. try: result = str(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") translate = { '\t': r'\t', '\n': r'\n', '\r': r'\r', } if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return str(result) if hasattr(thing, '__next__'): return str(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) elif (is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items) elif isinstance(thing, str) and quote_strings: result = "'{thing}'".format(thing=as_escaped_unicode(thing)) else: result = as_escaped_unicode(thing) return str(result) # always unicode
def format_object_summary(obj, formatter, is_justify=True, name=None, indent_for_name=True): """ Return the formatted obj as a unicode string Parameters ---------- obj : object must be iterable and support __getitem__ formatter : callable string formatter for an element is_justify : boolean should justify the display name : name, optional defaults to the class name of the obj indent_for_name : bool, default True Whether subsequent lines should be be indented to align with the name. Returns ------- summary string """ from pandas.io.formats.console import get_console_size from pandas.io.formats.format import _get_adjustment display_width, _ = get_console_size() if display_width is None: display_width = get_option('display.width') or 80 if name is None: name = obj.__class__.__name__ if indent_for_name: name_len = len(name) space1 = "\n%s" % (' ' * (name_len + 1)) space2 = "\n%s" % (' ' * (name_len + 2)) else: space1 = "\n" space2 = "\n " # space for the opening '[' n = len(obj) sep = ',' max_seq_items = get_option('display.max_seq_items') or n # are we a truncated display is_truncated = n > max_seq_items # adj can optionally handle unicode eastern asian width adj = _get_adjustment() def _extend_line(s, line, value, display_width, next_line_prefix): if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width): s += line.rstrip() line = next_line_prefix line += value return s, line def best_len(values): if values: return max(adj.len(x) for x in values) else: return 0 close = ', ' if n == 0: summary = '[]{}'.format(close) elif n == 1: first = formatter(obj[0]) summary = '[{}]{}'.format(first, close) elif n == 2: first = formatter(obj[0]) last = formatter(obj[-1]) summary = '[{}, {}]{}'.format(first, last, close) else: if n > max_seq_items: n = min(max_seq_items // 2, 10) head = [formatter(x) for x in obj[:n]] tail = [formatter(x) for x in obj[-n:]] else: head = [] tail = [formatter(x) for x in obj] # adjust all values to max length if needed if is_justify: # however, if we are not truncated and we are only a single # line, then don't justify if (is_truncated or not (len(', '.join(head)) < display_width and len(', '.join(tail)) < display_width)): max_len = max(best_len(head), best_len(tail)) head = [x.rjust(max_len) for x in head] tail = [x.rjust(max_len) for x in tail] summary = "" line = space2 for i in range(len(head)): word = head[i] + sep + ' ' summary, line = _extend_line(summary, line, word, display_width, space2) if is_truncated: # remove trailing space of last line summary += line.rstrip() + space2 + '...' line = space2 for i in range(len(tail) - 1): word = tail[i] + sep + ' ' summary, line = _extend_line(summary, line, word, display_width, space2) # last value: no sep added + 1 space of width used for trailing ',' summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2) summary += line # right now close is either '' or ', ' # Now we want to include the ']', but not the maybe space. close = ']' + close.rstrip(' ') summary += close if len(summary) > (display_width): summary += space1 else: # one row summary += ' ' # remove initial space summary = '[' + summary[len(space2):] return summary
UserWarning) except ImportError: # pragma: no cover pass _USE_BOTTLENECK = False def set_use_bottleneck(v=True): # set/unset to use bottleneck global _USE_BOTTLENECK if _BOTTLENECK_INSTALLED: _USE_BOTTLENECK = v set_use_bottleneck(get_option('compute.use_bottleneck')) class disallow(object): def __init__(self, *dtypes): super(disallow, self).__init__() self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) def check(self, obj): return hasattr(obj, 'dtype') and issubclass(obj.dtype.type, self.dtypes) def __call__(self, f): @functools.wraps(f) def _f(*args, **kwargs): obj_iter = itertools.chain(args, compat.itervalues(kwargs))
def _translate(self, sparsify_index: bool | None = None, sparsify_cols: bool | None = None): """ Process Styler data and settings into a dict for template rendering. Convert data and settings from ``Styler`` attributes such as ``self.data``, ``self.tooltips`` including applying any methods in ``self._todo``. Parameters ---------- sparsify_index : bool, optional Whether to sparsify the index or print all hierarchical index elements sparsify_cols : bool, optional Whether to sparsify the columns or print all hierarchical column elements Returns ------- d : dict The following structure: {uuid, table_styles, caption, head, body, cellstyle, table_attributes} """ if sparsify_index is None: sparsify_index = get_option("display.multi_sparse") if sparsify_cols is None: sparsify_cols = get_option("display.multi_sparse") ROW_HEADING_CLASS = "row_heading" COL_HEADING_CLASS = "col_heading" INDEX_NAME_CLASS = "index_name" DATA_CLASS = "data" BLANK_CLASS = "blank" BLANK_VALUE = " " # construct render dict d = { "uuid": self.uuid, "table_styles": _format_table_styles(self.table_styles or []), "caption": self.caption, } head = self._translate_header(BLANK_CLASS, BLANK_VALUE, INDEX_NAME_CLASS, COL_HEADING_CLASS, sparsify_cols) d.update({"head": head}) self.cellstyle_map: DefaultDict[tuple[CSSPair, ...], list[str]] = defaultdict(list) body = self._translate_body(DATA_CLASS, ROW_HEADING_CLASS, sparsify_index) d.update({"body": body}) cellstyle: list[dict[str, CSSList | list[str]]] = [{ "props": list(props), "selectors": selectors } for props, selectors in self.cellstyle_map.items()] d.update({"cellstyle": cellstyle}) table_attr = self.table_attributes use_mathjax = get_option("display.html.use_mathjax") if not use_mathjax: table_attr = table_attr or "" if 'class="' in table_attr: table_attr = table_attr.replace('class="', 'class="tex2jax_ignore ') else: table_attr += ' class="tex2jax_ignore"' d.update({"table_attributes": table_attr}) if self.tooltips: d = self.tooltips._translate(self.data, self.uuid, d) return d
except ImportError: _HAVE_FASTPARQUET = False pytestmark = pytest.mark.filterwarnings( "ignore:RangeIndex.* is deprecated:DeprecationWarning") # TODO(ArrayManager) fastparquet relies on BlockManager internals # setup engines & skips @pytest.fixture(params=[ pytest.param( "fastparquet", marks=pytest.mark.skipif( not _HAVE_FASTPARQUET or get_option("mode.data_manager") == "array", reason="fastparquet is not installed or ArrayManager is used", ), ), pytest.param( "pyarrow", marks=pytest.mark.skipif(not _HAVE_PYARROW, reason="pyarrow is not installed"), ), ]) def engine(request): return request.param @pytest.fixture def pa():
def format_object_summary( obj, formatter: Callable, is_justify: bool = True, name: Optional[str] = None, indent_for_name: bool = True, line_break_each_value: bool = False, ) -> str: """ Return the formatted obj as a unicode string Parameters ---------- obj : object must be iterable and support __getitem__ formatter : callable string formatter for an element is_justify : boolean should justify the display name : name, optional defaults to the class name of the obj indent_for_name : bool, default True Whether subsequent lines should be be indented to align with the name. line_break_each_value : bool, default False If True, inserts a line break for each value of ``obj``. If False, only break lines when the a line of values gets wider than the display width. .. versionadded:: 0.25.0 Returns ------- summary string """ from pandas.io.formats.console import get_console_size from pandas.io.formats.format import _get_adjustment display_width, _ = get_console_size() if display_width is None: display_width = get_option("display.width") or 80 if name is None: name = type(obj).__name__ if indent_for_name: name_len = len(name) space1 = f'\n{(" " * (name_len + 1))}' space2 = f'\n{(" " * (name_len + 2))}' else: space1 = "\n" space2 = "\n " # space for the opening '[' n = len(obj) if line_break_each_value: # If we want to vertically align on each value of obj, we need to # separate values by a line break and indent the values sep = ",\n " + " " * len(name) else: sep = "," max_seq_items = get_option("display.max_seq_items") or n # are we a truncated display is_truncated = n > max_seq_items # adj can optionally handle unicode eastern asian width adj = _get_adjustment() def _extend_line( s: str, line: str, value: str, display_width: int, next_line_prefix: str ) -> Tuple[str, str]: if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width: s += line.rstrip() line = next_line_prefix line += value return s, line def best_len(values: List[str]) -> int: if values: return max(adj.len(x) for x in values) else: return 0 close = ", " if n == 0: summary = f"[]{close}" elif n == 1 and not line_break_each_value: first = formatter(obj[0]) summary = f"[{first}]{close}" elif n == 2 and not line_break_each_value: first = formatter(obj[0]) last = formatter(obj[-1]) summary = f"[{first}, {last}]{close}" else: if n > max_seq_items: n = min(max_seq_items // 2, 10) head = [formatter(x) for x in obj[:n]] tail = [formatter(x) for x in obj[-n:]] else: head = [] tail = [formatter(x) for x in obj] # adjust all values to max length if needed if is_justify: if line_break_each_value: # Justify each string in the values of head and tail, so the # strings will right align when head and tail are stacked # vertically. head, tail = _justify(head, tail) elif is_truncated or not ( len(", ".join(head)) < display_width and len(", ".join(tail)) < display_width ): # Each string in head and tail should align with each other max_length = max(best_len(head), best_len(tail)) head = [x.rjust(max_length) for x in head] tail = [x.rjust(max_length) for x in tail] # If we are not truncated and we are only a single # line, then don't justify if line_break_each_value: # Now head and tail are of type List[Tuple[str]]. Below we # convert them into List[str], so there will be one string per # value. Also truncate items horizontally if wider than # max_space max_space = display_width - len(space2) value = tail[0] for max_items in reversed(range(1, len(value) + 1)): pprinted_seq = _pprint_seq(value, max_seq_items=max_items) if len(pprinted_seq) < max_space: break head = [_pprint_seq(x, max_seq_items=max_items) for x in head] tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail] summary = "" line = space2 for max_items in range(len(head)): word = head[max_items] + sep + " " summary, line = _extend_line(summary, line, word, display_width, space2) if is_truncated: # remove trailing space of last line summary += line.rstrip() + space2 + "..." line = space2 for max_items in range(len(tail) - 1): word = tail[max_items] + sep + " " summary, line = _extend_line(summary, line, word, display_width, space2) # last value: no sep added + 1 space of width used for trailing ',' summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2) summary += line # right now close is either '' or ', ' # Now we want to include the ']', but not the maybe space. close = "]" + close.rstrip(" ") summary += close if len(summary) > (display_width) or line_break_each_value: summary += space1 else: # one row summary += " " # remove initial space summary = "[" + summary[len(space2) :] return summary
from pandas.io.formats.printing import pprint_thing from pandas.plotting._matplotlib import converter from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0 from pandas.plotting._matplotlib.style import _get_standard_colors from pandas.plotting._matplotlib.tools import ( _flatten, _get_all_lines, _get_xlim, _handle_shared_axes, _subplots, format_date_labels, table, ) if get_option("plotting.matplotlib.register_converters"): converter.register(explicit=False) class MPLPlot: """ Base class for assembling a pandas plot using matplotlib Parameters ---------- data : """ @property def _kind(self): """Specify kind str. Must be overridden in child class"""
def pprint_thing( thing: Any, _nest_lvl: int = 0, escape_chars: Optional[EscapeChars] = None, default_escapes: bool = False, quote_strings: bool = False, max_seq_items: Optional[int] = None, ) -> str: """ This function is the sanctioned way of converting objects to a string representation and properly handles nested sequences. Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : int or None, default None Pass through to other pretty printers to limit sequence printing Returns ------- str """ def as_escaped_string( thing: Any, escape_chars: Optional[EscapeChars] = escape_chars ) -> str: translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"} if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() result = str(thing) for c in escape_chars: result = result.replace(c, translate[c]) return result if hasattr(thing, "__next__"): return str(thing) elif isinstance(thing, dict) and _nest_lvl < get_option( "display.pprint_nest_depth" ): result = _pprint_dict( thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items ) elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"): result = _pprint_seq( thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items, ) elif isinstance(thing, str) and quote_strings: result = "'{thing}'".format(thing=as_escaped_string(thing)) else: result = as_escaped_string(thing) return result
def _initialize_memory_usage( memory_usage: bool | str | None = None, ) -> bool | str: """Get memory usage based on inputs and display options.""" if memory_usage is None: memory_usage = get_option("display.memory_usage") return memory_usage
def _initialize_max_cols(self, max_cols: int | None) -> int: if max_cols is None: return get_option("display.max_info_columns", self.col_count + 1) return max_cols
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False, quote_strings=False, max_seq_items=None): """ This function is the sanctioned way of converting objects to a unicode representation. properly handles nested sequences containing unicode strings (unicode(object) does not) Parameters ---------- thing : anything to be formatted _nest_lvl : internal use only. pprint_thing() is mutually-recursive with pprint_sequence, this argument is used to keep track of the current nesting level, and limit it. escape_chars : list or dict, optional Characters to escape. If a dict is passed the values are the replacements default_escapes : bool, default False Whether the input escape characters replaces or adds to the defaults max_seq_items : False, int, default None Pass thru to other pretty printers to limit sequence printing Returns ------- result - unicode object on py2, str on py3. Always Unicode. """ def as_escaped_unicode(thing, escape_chars=escape_chars): # Unicode is fine, else we try to decode using utf-8 and 'replace' # if that's not it either, we have no way of knowing and the user # should deal with it himself. try: result = str(thing) # we should try this first except UnicodeDecodeError: # either utf-8 or we replace errors result = str(thing).decode('utf-8', "replace") translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', } if isinstance(escape_chars, dict): if default_escapes: translate.update(escape_chars) else: translate = escape_chars escape_chars = list(escape_chars.keys()) else: escape_chars = escape_chars or tuple() for c in escape_chars: result = result.replace(c, translate[c]) return str(result) if hasattr(thing, '__next__'): return str(thing) elif (isinstance(thing, dict) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_dict(thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items) elif (is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth")): result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars, quote_strings=quote_strings, max_seq_items=max_seq_items) elif isinstance(thing, str) and quote_strings: result = "'{thing}'".format(thing=as_escaped_unicode(thing)) else: result = as_escaped_unicode(thing) return str(result) # always unicode
def _translate(self): """ Convert the DataFrame in `self.data` and the attrs from `_build_styles` into a dictionary of {head, body, uuid, cellstyle}. """ table_styles = self.table_styles or [] caption = self.caption ctx = self.ctx precision = self.precision hidden_index = self.hidden_index hidden_columns = self.hidden_columns uuid = self.uuid or str(uuid1()).replace("-", "_") ROW_HEADING_CLASS = "row_heading" COL_HEADING_CLASS = "col_heading" INDEX_NAME_CLASS = "index_name" DATA_CLASS = "data" BLANK_CLASS = "blank" BLANK_VALUE = "" def format_attr(pair): return "{key}={value}".format(**pair) # for sparsifying a MultiIndex idx_lengths = _get_level_lengths(self.index) col_lengths = _get_level_lengths(self.columns, hidden_columns) cell_context = dict() n_rlvls = self.data.index.nlevels n_clvls = self.data.columns.nlevels rlabels = self.data.index.tolist() clabels = self.data.columns.tolist() if n_rlvls == 1: rlabels = [[x] for x in rlabels] if n_clvls == 1: clabels = [[x] for x in clabels] clabels = list(zip(*clabels)) cellstyle = [] head = [] for r in range(n_clvls): # Blank for Index columns... row_es = [{"type": "th", "value": BLANK_VALUE, "display_value": BLANK_VALUE, "is_visible": not hidden_index, "class": " ".join([BLANK_CLASS])}] * (n_rlvls - 1) # ... except maybe the last for columns.names name = self.data.columns.names[r] cs = [BLANK_CLASS if name is None else INDEX_NAME_CLASS, "level{lvl}".format(lvl=r)] name = BLANK_VALUE if name is None else name row_es.append({"type": "th", "value": name, "display_value": name, "class": " ".join(cs), "is_visible": not hidden_index}) if clabels: for c, value in enumerate(clabels[r]): cs = [COL_HEADING_CLASS, "level{lvl}".format(lvl=r), "col{col}".format(col=c)] cs.extend(cell_context.get( "col_headings", {}).get(r, {}).get(c, [])) es = { "type": "th", "value": value, "display_value": value, "class": " ".join(cs), "is_visible": _is_visible(c, r, col_lengths), } colspan = col_lengths.get((r, c), 0) if colspan > 1: es["attributes"] = [ format_attr({"key": "colspan", "value": colspan}) ] row_es.append(es) head.append(row_es) if (self.data.index.names and com._any_not_none(*self.data.index.names) and not hidden_index): index_header_row = [] for c, name in enumerate(self.data.index.names): cs = [INDEX_NAME_CLASS, "level{lvl}".format(lvl=c)] name = '' if name is None else name index_header_row.append({"type": "th", "value": name, "class": " ".join(cs)}) index_header_row.extend( [{"type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS]) }] * (len(clabels[0]) - len(hidden_columns))) head.append(index_header_row) body = [] for r, idx in enumerate(self.data.index): row_es = [] for c, value in enumerate(rlabels[r]): rid = [ROW_HEADING_CLASS, "level{lvl}".format(lvl=c), "row{row}".format(row=r)] es = { "type": "th", "is_visible": (_is_visible(r, c, idx_lengths) and not hidden_index), "value": value, "display_value": value, "id": "_".join(rid[1:]), "class": " ".join(rid) } rowspan = idx_lengths.get((c, r), 0) if rowspan > 1: es["attributes"] = [ format_attr({"key": "rowspan", "value": rowspan}) ] row_es.append(es) for c, col in enumerate(self.data.columns): cs = [DATA_CLASS, "row{row}".format(row=r), "col{col}".format(col=c)] cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) formatter = self._display_funcs[(r, c)] value = self.data.iloc[r, c] row_dict = {"type": "td", "value": value, "class": " ".join(cs), "display_value": formatter(value), "is_visible": (c not in hidden_columns)} # only add an id if the cell has a style if (self.cell_ids or not(len(ctx[r, c]) == 1 and ctx[r, c][0] == '')): row_dict["id"] = "_".join(cs[1:]) row_es.append(row_dict) props = [] for x in ctx[r, c]: # have to handle empty styles like [''] if x.count(":"): props.append(x.split(":")) else: props.append(['', '']) cellstyle.append({'props': props, 'selector': "row{row}_col{col}" .format(row=r, col=c)}) body.append(row_es) table_attr = self.table_attributes use_mathjax = get_option("display.html.use_mathjax") if not use_mathjax: table_attr = table_attr or '' if 'class="' in table_attr: table_attr = table_attr.replace('class="', 'class="tex2jax_ignore ') else: table_attr += ' class="tex2jax_ignore"' return dict(head=head, cellstyle=cellstyle, body=body, uuid=uuid, precision=precision, table_styles=table_styles, caption=caption, table_attributes=table_attr)
def _ensure_decoded(s): """ if we have bytes, decode them to unicode """ if isinstance(s, (np.bytes_, bytes)): s = s.decode(get_option("display.encoding")) return s
def infor(file, verbose=None, buf=None, max_cols=None, null_counts=None): lines = [] lines.append(str(type(file))) lines.append(file.index._summary()) if len(file.columns) == 0: lines.append("Empty {name}".format(name=type(file).__name__)) fmt.buffer_put_lines(buf, lines) return cols = file.columns # hack if max_cols is None: max_cols = get_option("display.max_info_columns", len(file.columns) + 1) max_rows = get_option("display.max_info_rows", len(file) + 1) if null_counts is None: show_counts = (len(file.columns) <= max_cols) and (len(file) < max_rows) else: show_counts = null_counts exceeds_info_cols = len(file.columns) > max_cols def _verbose_repr(): lines.append("Data columns (total %d columns):" % len(file.columns)) space = max(len(pprint_thing(k)) for k in file.columns) + 4 counts = None tmpl = "{count}{dtype}" if show_counts: counts = file.count() if len(cols) != len(counts): # pragma: no cover raise AssertionError("Columns must equal counts " "({cols:d} != {counts:d})".format( cols=len(cols), counts=len(counts))) tmpl = "{count} non-null {dtype}" dtypes = file.dtypes for i, col in enumerate(file.columns): dtype = dtypes.iloc[i] col = pprint_thing(col) count = "" if show_counts: count = counts.iloc[i] lines.append( _put_str(col, space) + tmpl.format(count=count, dtype=dtype)) def _non_verbose_repr(): lines.append(file.columns._summary(name="Columns")) if verbose: _verbose_repr() elif verbose is False: # specifically set to False, not nesc None _non_verbose_repr() else: if exceeds_info_cols: _non_verbose_repr() else: _verbose_repr() counts = file._data.get_dtype_counts() dtypes = [ "{k}({kk:d})".format(k=k[0], kk=k[1]) for k in sorted(counts.items()) ] lines.append("dtypes: {types}".format(types=", ".join(dtypes))) # fmt.buffer_put_lines(buf, lines) if any(isinstance(x, str) for x in lines): lines = [str(x) for x in lines] return "\n".join(lines)
bn = import_optional_dependency("bottleneck", raise_on_missing=False, on_version="warn") _BOTTLENECK_INSTALLED = bn is not None _USE_BOTTLENECK = False def set_use_bottleneck(v: bool = True) -> None: # set/unset to use bottleneck global _USE_BOTTLENECK if _BOTTLENECK_INSTALLED: _USE_BOTTLENECK = v set_use_bottleneck(get_option("compute.use_bottleneck")) class disallow: def __init__(self, *dtypes): super().__init__() self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) def check(self, obj) -> bool: return hasattr(obj, "dtype") and issubclass(obj.dtype.type, self.dtypes) def __call__(self, f: F) -> F: @functools.wraps(f) def _f(*args, **kwargs): obj_iter = itertools.chain(args, kwargs.values())
except ImportError: # pragma: no cover pass _USE_BOTTLENECK = False def set_use_bottleneck(v=True): # set/unset to use bottleneck global _USE_BOTTLENECK if _BOTTLENECK_INSTALLED: _USE_BOTTLENECK = v set_use_bottleneck(get_option('compute.use_bottleneck')) class disallow(object): def __init__(self, *dtypes): super(disallow, self).__init__() self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes) def check(self, obj): return hasattr(obj, 'dtype') and issubclass(obj.dtype.type, self.dtypes) def __call__(self, f): @functools.wraps(f) def _f(*args, **kwargs):
pytestmark = pytest.mark.filterwarnings( "ignore:RangeIndex.* is deprecated:DeprecationWarning" ) # TODO(ArrayManager) fastparquet relies on BlockManager internals # setup engines & skips @pytest.fixture( params=[ pytest.param( "fastparquet", marks=pytest.mark.skipif( not _HAVE_FASTPARQUET or get_option("mode.data_manager") == "array", reason="fastparquet is not installed or ArrayManager is used", ), ), pytest.param( "pyarrow", marks=pytest.mark.skipif( not _HAVE_PYARROW, reason="pyarrow is not installed" ), ), ] ) def engine(request): return request.param
'b_value': b_value}, casting='safe') except ValueError as detail: if 'unknown type object' in str(detail): pass except Exception as detail: raise TypeError(str(detail)) if result is None: result = _where_standard(cond, a, b) return result # turn myself on set_use_numexpr(get_option('compute.use_numexpr')) def _has_bool_dtype(x): try: if isinstance(x, ABCDataFrame): return 'bool' in x.dtypes else: return x.dtype == bool except AttributeError: return isinstance(x, (bool, np.bool_)) def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('/', '//', '**')), unsupported=None): if unsupported is None:
def fp(): if not _HAVE_FASTPARQUET: pytest.skip("fastparquet is not installed") elif get_option("mode.data_manager") == "array": pytest.skip("ArrayManager is not supported with fastparquet") return "fastparquet"
def _translate(self, sparse_index: bool, sparse_cols: bool, blank: str = " "): """ Process Styler data and settings into a dict for template rendering. Convert data and settings from ``Styler`` attributes such as ``self.data``, ``self.tooltips`` including applying any methods in ``self._todo``. Parameters ---------- sparse_index : bool Whether to sparsify the index or print all hierarchical index elements. Upstream defaults are typically to `pandas.options.styler.sparse.index`. sparse_cols : bool Whether to sparsify the columns or print all hierarchical column elements. Upstream defaults are typically to `pandas.options.styler.sparse.columns`. Returns ------- d : dict The following structure: {uuid, table_styles, caption, head, body, cellstyle, table_attributes} """ ROW_HEADING_CLASS = "row_heading" COL_HEADING_CLASS = "col_heading" INDEX_NAME_CLASS = "index_name" TRIMMED_COL_CLASS = "col_trim" TRIMMED_ROW_CLASS = "row_trim" DATA_CLASS = "data" BLANK_CLASS = "blank" BLANK_VALUE = blank # construct render dict d = { "uuid": self.uuid, "table_styles": _format_table_styles(self.table_styles or []), "caption": self.caption, } max_elements = get_option("styler.render.max_elements") max_rows, max_cols = _get_trimming_maximums(len(self.data.index), len(self.data.columns), max_elements) head = self._translate_header( BLANK_CLASS, BLANK_VALUE, INDEX_NAME_CLASS, COL_HEADING_CLASS, sparse_cols, max_cols, TRIMMED_COL_CLASS, ) d.update({"head": head}) self.cellstyle_map: DefaultDict[tuple[CSSPair, ...], list[str]] = defaultdict(list) body = self._translate_body( DATA_CLASS, ROW_HEADING_CLASS, sparse_index, max_rows, max_cols, TRIMMED_ROW_CLASS, TRIMMED_COL_CLASS, ) d.update({"body": body}) cellstyle: list[dict[str, CSSList | list[str]]] = [{ "props": list(props), "selectors": selectors } for props, selectors in self.cellstyle_map.items()] d.update({"cellstyle": cellstyle}) table_attr = self.table_attributes use_mathjax = get_option("display.html.use_mathjax") if not use_mathjax: table_attr = table_attr or "" if 'class="' in table_attr: table_attr = table_attr.replace('class="', 'class="tex2jax_ignore ') else: table_attr += ' class="tex2jax_ignore"' d.update({"table_attributes": table_attr}) if self.tooltips: d = self.tooltips._translate(self.data, self.uuid, d) return d
def info( data, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None ) -> None: """ Print a concise summary of a DataFrame. This method prints information about a DataFrame including the index dtype and column dtypes, non-null values and memory usage. Parameters ---------- data : DataFrame DataFrame to print information about. verbose : bool, optional Whether to print the full summary. By default, the setting in ``pandas.options.display.max_info_columns`` is followed. buf : writable buffer, defaults to sys.stdout Where to send the output. By default, the output is printed to sys.stdout. Pass a writable buffer if you need to further process the output. max_cols : int, optional When to switch from the verbose to the truncated output. If the DataFrame has more than `max_cols` columns, the truncated output is used. By default, the setting in ``pandas.options.display.max_info_columns`` is used. memory_usage : bool, str, optional Specifies whether total memory usage of the DataFrame elements (including the index) should be displayed. By default, this follows the ``pandas.options.display.memory_usage`` setting. True always show memory usage. False never shows memory usage. A value of 'deep' is equivalent to "True with deep introspection". Memory usage is shown in human-readable units (base-2 representation). Without deep introspection a memory estimation is made based in column dtype and number of rows assuming values consume the same memory amount for corresponding dtypes. With deep memory introspection, a real memory usage calculation is performed at the cost of computational resources. null_counts : bool, optional Whether to show the non-null counts. By default, this is shown only if the frame is smaller than ``pandas.options.display.max_info_rows`` and ``pandas.options.display.max_info_columns``. A value of True always shows the counts, and False never shows the counts. Returns ------- None This method prints a summary of a DataFrame and returns None. See Also -------- DataFrame.describe: Generate descriptive statistics of DataFrame columns. DataFrame.memory_usage: Memory usage of DataFrame columns. Examples -------- >>> int_values = [1, 2, 3, 4, 5] >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon'] >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0] >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values, ... "float_col": float_values}) >>> df int_col text_col float_col 0 1 alpha 0.00 1 2 beta 0.25 2 3 gamma 0.50 3 4 delta 0.75 4 5 epsilon 1.00 Prints information of all columns: >>> df.info(verbose=True) <class 'pandas.core.frame.DataFrame'> RangeIndex: 5 entries, 0 to 4 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 int_col 5 non-null int64 1 text_col 5 non-null object 2 float_col 5 non-null float64 dtypes: float64(1), int64(1), object(1) memory usage: 248.0+ bytes Prints a summary of columns count and its dtypes but not per column information: >>> df.info(verbose=False) <class 'pandas.core.frame.DataFrame'> RangeIndex: 5 entries, 0 to 4 Columns: 3 entries, int_col to float_col dtypes: float64(1), int64(1), object(1) memory usage: 248.0+ bytes Pipe output of DataFrame.info to buffer instead of sys.stdout, get buffer content and writes to a text file: >>> import io >>> buffer = io.StringIO() >>> df.info(buf=buffer) >>> s = buffer.getvalue() >>> with open("df_info.txt", "w", ... encoding="utf-8") as f: # doctest: +SKIP ... f.write(s) 260 The `memory_usage` parameter allows deep introspection mode, specially useful for big DataFrames and fine-tune memory optimization: >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6) >>> df = pd.DataFrame({ ... 'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6), ... 'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6), ... 'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6) ... }) >>> df.info() <class 'pandas.core.frame.DataFrame'> RangeIndex: 1000000 entries, 0 to 999999 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 column_1 1000000 non-null object 1 column_2 1000000 non-null object 2 column_3 1000000 non-null object dtypes: object(3) memory usage: 22.9+ MB >>> df.info(memory_usage='deep') <class 'pandas.core.frame.DataFrame'> RangeIndex: 1000000 entries, 0 to 999999 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 column_1 1000000 non-null object 1 column_2 1000000 non-null object 2 column_3 1000000 non-null object dtypes: object(3) memory usage: 188.8 MB """ if buf is None: # pragma: no cover buf = sys.stdout lines = [] lines.append(str(type(data))) lines.append(data.index._summary()) if len(data.columns) == 0: lines.append(f"Empty {type(data).__name__}") fmt.buffer_put_lines(buf, lines) return cols = data.columns col_count = len(data.columns) # hack if max_cols is None: max_cols = get_option("display.max_info_columns", len(data.columns) + 1) max_rows = get_option("display.max_info_rows", len(data) + 1) if null_counts is None: show_counts = (col_count <= max_cols) and (len(data) < max_rows) else: show_counts = null_counts exceeds_info_cols = col_count > max_cols def _verbose_repr(): lines.append(f"Data columns (total {len(data.columns)} columns):") id_head = " # " column_head = "Column" col_space = 2 max_col = max(len(pprint_thing(k)) for k in cols) len_column = len(pprint_thing(column_head)) space = max(max_col, len_column) + col_space max_id = len(pprint_thing(col_count)) len_id = len(pprint_thing(id_head)) space_num = max(max_id, len_id) + col_space header = _put_str(id_head, space_num) + _put_str(column_head, space) if show_counts: counts = data.count() if len(cols) != len(counts): # pragma: no cover raise AssertionError( f"Columns must equal counts ({len(cols)} != {len(counts)})" ) count_header = "Non-Null Count" len_count = len(count_header) non_null = " non-null" max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null) space_count = max(len_count, max_count) + col_space count_temp = "{count}" + non_null else: count_header = "" space_count = len(count_header) len_count = space_count count_temp = "{count}" dtype_header = "Dtype" len_dtype = len(dtype_header) max_dtypes = max(len(pprint_thing(k)) for k in data.dtypes) space_dtype = max(len_dtype, max_dtypes) header += _put_str(count_header, space_count) + _put_str( dtype_header, space_dtype ) lines.append(header) lines.append( _put_str("-" * len_id, space_num) + _put_str("-" * len_column, space) + _put_str("-" * len_count, space_count) + _put_str("-" * len_dtype, space_dtype) ) for i, col in enumerate(data.columns): dtype = data.dtypes.iloc[i] col = pprint_thing(col) line_no = _put_str(f" {i}", space_num) count = "" if show_counts: count = counts.iloc[i] lines.append( line_no + _put_str(col, space) + _put_str(count_temp.format(count=count), space_count) + _put_str(dtype, space_dtype) ) def _non_verbose_repr(): lines.append(data.columns._summary(name="Columns")) def _sizeof_fmt(num, size_qualifier): # returns size in human readable format for x in ["bytes", "KB", "MB", "GB", "TB"]: if num < 1024.0: return f"{num:3.1f}{size_qualifier} {x}" num /= 1024.0 return f"{num:3.1f}{size_qualifier} PB" if verbose: _verbose_repr() elif verbose is False: # specifically set to False, not nesc None _non_verbose_repr() else: if exceeds_info_cols: _non_verbose_repr() else: _verbose_repr() # groupby dtype.name to collect e.g. Categorical columns counts = data.dtypes.value_counts().groupby(lambda x: x.name).sum() dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())] lines.append(f"dtypes: {', '.join(dtypes)}") if memory_usage is None: memory_usage = get_option("display.memory_usage") if memory_usage: # append memory usage of df to display size_qualifier = "" if memory_usage == "deep": deep = True else: # size_qualifier is just a best effort; not guaranteed to catch # all cases (e.g., it misses categorical data even with object # categories) deep = False if "object" in counts or data.index._is_memory_usage_qualified(): size_qualifier = "+" mem_usage = data.memory_usage(index=True, deep=deep).sum() lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n") fmt.buffer_put_lines(buf, lines)
if _can_use_numexpr(None, "where", a, b, "where"): result = ne.evaluate( "where(cond_value, a_value, b_value)", local_dict={"cond_value": cond, "a_value": a, "b_value": b}, casting="safe", ) if result is None: result = _where_standard(cond, a, b) return result # turn myself on set_use_numexpr(get_option("compute.use_numexpr")) def _has_bool_dtype(x): try: return x.dtype == bool except AttributeError: return isinstance(x, (bool, np.bool_)) def _bool_arith_check( op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None ): if unsupported is None: unsupported = {"+": "|", "*": "&", "-": "^"}
def info(self) -> None: """ Print a concise summary of a %(klass)s. This method prints information about a %(klass)s including the index dtype%(type_sub)s, non-null values and memory usage. Parameters ---------- data : %(klass)s %(klass)s to print information about. verbose : bool, optional Whether to print the full summary. By default, the setting in ``pandas.options.display.max_info_columns`` is followed. buf : writable buffer, defaults to sys.stdout Where to send the output. By default, the output is printed to sys.stdout. Pass a writable buffer if you need to further process the output. %(max_cols_sub)s memory_usage : bool, str, optional Specifies whether total memory usage of the %(klass)s elements (including the index) should be displayed. By default, this follows the ``pandas.options.display.memory_usage`` setting. True always show memory usage. False never shows memory usage. A value of 'deep' is equivalent to "True with deep introspection". Memory usage is shown in human-readable units (base-2 representation). Without deep introspection a memory estimation is made based in column dtype and number of rows assuming values consume the same memory amount for corresponding dtypes. With deep memory introspection, a real memory usage calculation is performed at the cost of computational resources. null_counts : bool, optional Whether to show the non-null counts. By default, this is shown only if the %(klass)s is smaller than ``pandas.options.display.max_info_rows`` and ``pandas.options.display.max_info_columns``. A value of True always shows the counts, and False never shows the counts. Returns ------- None This method prints a summary of a %(klass)s and returns None. See Also -------- %(see_also_sub)s Examples -------- %(examples_sub)s """ lines = [] lines.append(str(type(self.data))) lines.append(self.data.index._summary()) ids, dtypes = self._get_ids_and_dtypes() col_count = len(ids) if col_count == 0: lines.append(f"Empty {type(self.data).__name__}") fmt.buffer_put_lines(self.buf, lines) return # hack max_cols = self.max_cols if max_cols is None: max_cols = get_option("display.max_info_columns", col_count + 1) max_rows = get_option("display.max_info_rows", len(self.data) + 1) if self.null_counts is None: show_counts = (col_count <= max_cols) and (len(self.data) < max_rows) else: show_counts = self.null_counts exceeds_info_cols = col_count > max_cols if self.verbose: self._verbose_repr(lines, ids, dtypes, show_counts) elif self.verbose is False: # specifically set to False, not necessarily None self._non_verbose_repr(lines, ids) else: if exceeds_info_cols: self._non_verbose_repr(lines, ids) else: self._verbose_repr(lines, ids, dtypes, show_counts) # groupby dtype.name to collect e.g. Categorical columns counts = dtypes.value_counts().groupby(lambda x: x.name).sum() collected_dtypes = [ f"{k[0]}({k[1]:d})" for k in sorted(counts.items()) ] lines.append(f"dtypes: {', '.join(collected_dtypes)}") if self.memory_usage: # append memory usage of df to display size_qualifier = "" if self.memory_usage == "deep": deep = True else: # size_qualifier is just a best effort; not guaranteed to catch # all cases (e.g., it misses categorical data even with object # categories) deep = False if "object" in counts or self.data.index._is_memory_usage_qualified( ): size_qualifier = "+" mem_usage = self._get_mem_usage(deep=deep) lines.append( f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n") fmt.buffer_put_lines(self.buf, lines)
def _translate(self): """ Convert the DataFrame in `self.data` and the attrs from `_build_styles` into a dictionary of {head, body, uuid, cellstyle}. """ table_styles = self.table_styles or [] caption = self.caption ctx = self.ctx precision = self.precision hidden_index = self.hidden_index hidden_columns = self.hidden_columns uuid = self.uuid or str(uuid1()).replace("-", "_") ROW_HEADING_CLASS = "row_heading" COL_HEADING_CLASS = "col_heading" INDEX_NAME_CLASS = "index_name" DATA_CLASS = "data" BLANK_CLASS = "blank" BLANK_VALUE = "" def format_attr(pair): return "{key}={value}".format(**pair) # for sparsifying a MultiIndex idx_lengths = _get_level_lengths(self.index) col_lengths = _get_level_lengths(self.columns, hidden_columns) cell_context = dict() n_rlvls = self.data.index.nlevels n_clvls = self.data.columns.nlevels rlabels = self.data.index.tolist() clabels = self.data.columns.tolist() if n_rlvls == 1: rlabels = [[x] for x in rlabels] if n_clvls == 1: clabels = [[x] for x in clabels] clabels = list(zip(*clabels)) cellstyle = [] head = [] for r in range(n_clvls): # Blank for Index columns... row_es = [ { "type": "th", "value": BLANK_VALUE, "display_value": BLANK_VALUE, "is_visible": not hidden_index, "class": " ".join([BLANK_CLASS]), } ] * (n_rlvls - 1) # ... except maybe the last for columns.names name = self.data.columns.names[r] cs = [ BLANK_CLASS if name is None else INDEX_NAME_CLASS, "level{lvl}".format(lvl=r), ] name = BLANK_VALUE if name is None else name row_es.append( { "type": "th", "value": name, "display_value": name, "class": " ".join(cs), "is_visible": not hidden_index, } ) if clabels: for c, value in enumerate(clabels[r]): cs = [ COL_HEADING_CLASS, "level{lvl}".format(lvl=r), "col{col}".format(col=c), ] cs.extend( cell_context.get("col_headings", {}).get(r, {}).get(c, []) ) es = { "type": "th", "value": value, "display_value": value, "class": " ".join(cs), "is_visible": _is_visible(c, r, col_lengths), } colspan = col_lengths.get((r, c), 0) if colspan > 1: es["attributes"] = [ format_attr({"key": "colspan", "value": colspan}) ] row_es.append(es) head.append(row_es) if ( self.data.index.names and com.any_not_none(*self.data.index.names) and not hidden_index ): index_header_row = [] for c, name in enumerate(self.data.index.names): cs = [INDEX_NAME_CLASS, "level{lvl}".format(lvl=c)] name = "" if name is None else name index_header_row.append( {"type": "th", "value": name, "class": " ".join(cs)} ) index_header_row.extend( [{"type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS])}] * (len(clabels[0]) - len(hidden_columns)) ) head.append(index_header_row) body = [] for r, idx in enumerate(self.data.index): row_es = [] for c, value in enumerate(rlabels[r]): rid = [ ROW_HEADING_CLASS, "level{lvl}".format(lvl=c), "row{row}".format(row=r), ] es = { "type": "th", "is_visible": (_is_visible(r, c, idx_lengths) and not hidden_index), "value": value, "display_value": value, "id": "_".join(rid[1:]), "class": " ".join(rid), } rowspan = idx_lengths.get((c, r), 0) if rowspan > 1: es["attributes"] = [ format_attr({"key": "rowspan", "value": rowspan}) ] row_es.append(es) for c, col in enumerate(self.data.columns): cs = [DATA_CLASS, "row{row}".format(row=r), "col{col}".format(col=c)] cs.extend(cell_context.get("data", {}).get(r, {}).get(c, [])) formatter = self._display_funcs[(r, c)] value = self.data.iloc[r, c] row_dict = { "type": "td", "value": value, "class": " ".join(cs), "display_value": formatter(value), "is_visible": (c not in hidden_columns), } # only add an id if the cell has a style if self.cell_ids or not (len(ctx[r, c]) == 1 and ctx[r, c][0] == ""): row_dict["id"] = "_".join(cs[1:]) row_es.append(row_dict) props = [] for x in ctx[r, c]: # have to handle empty styles like [''] if x.count(":"): props.append(x.split(":")) else: props.append(["", ""]) cellstyle.append( { "props": props, "selector": "row{row}_col{col}".format(row=r, col=c), } ) body.append(row_es) table_attr = self.table_attributes use_mathjax = get_option("display.html.use_mathjax") if not use_mathjax: table_attr = table_attr or "" if 'class="' in table_attr: table_attr = table_attr.replace('class="', 'class="tex2jax_ignore ') else: table_attr += ' class="tex2jax_ignore"' return dict( head=head, cellstyle=cellstyle, body=body, uuid=uuid, precision=precision, table_styles=table_styles, caption=caption, table_attributes=table_attr, )
def _initialize_memory_usage( memory_usage: Optional[Union[bool, str]] = None, ) -> Union[bool, str]: """Get memory usage based on inputs and display options.""" if memory_usage is None: memory_usage = get_option("display.memory_usage") return memory_usage