Exemple #1
0
def _pprint_dict(seq, _nest_lvl=0, max_seq_items=None, **kwds):
    """
    internal. pprinter for iterables. you should probably use pprint_thing()
    rather then calling this directly.
    """
    fmt = "{{{things}}}"
    pairs = []

    pfmt = "{key}: {val}"

    if max_seq_items is False:
        nitems = len(seq)
    else:
        nitems = max_seq_items or get_option("max_seq_items") or len(seq)

    for k, v in list(seq.items())[:nitems]:
        pairs.append(
            pfmt.format(
                key=pprint_thing(k, _nest_lvl + 1,
                                 max_seq_items=max_seq_items, **kwds),
                val=pprint_thing(v, _nest_lvl + 1,
                                 max_seq_items=max_seq_items, **kwds)))

    if nitems < len(seq):
        return fmt.format(things=", ".join(pairs) + ", ...")
    else:
        return fmt.format(things=", ".join(pairs))
Exemple #2
0
    def _write_table(self, indent=0):
        _classes = ['dataframe']  # Default class.
        use_mathjax = get_option("display.html.use_mathjax")
        if not use_mathjax:
            _classes.append('tex2jax_ignore')
        if self.classes is not None:
            if isinstance(self.classes, str):
                self.classes = self.classes.split()
            if not isinstance(self.classes, (list, tuple)):
                raise TypeError('classes must be a string, list, or tuple, '
                                'not {typ}'.format(typ=type(self.classes)))
            _classes.extend(self.classes)

        if self.table_id is None:
            id_section = ""
        else:
            id_section = ' id="{table_id}"'.format(table_id=self.table_id)

        self.write('<table border="{border}" class="{cls}"{id_section}>'
                   .format(border=self.border, cls=' '.join(_classes),
                           id_section=id_section), indent)

        if self.fmt.header or self.show_row_idx_names:
            self._write_header(indent + self.indent_delta)

        self._write_body(indent + self.indent_delta)

        self.write('</table>', indent)
Exemple #3
0
def _pprint_seq(seq, _nest_lvl=0, max_seq_items=None, **kwds):
    """
    internal. pprinter for iterables. you should probably use pprint_thing()
    rather then calling this directly.

    bounds length of printed sequence, depending on options
    """
    if isinstance(seq, set):
        fmt = "{{{body}}}"
    else:
        fmt = "[{body}]" if hasattr(seq, '__setitem__') else "({body})"

    if max_seq_items is False:
        nitems = len(seq)
    else:
        nitems = max_seq_items or get_option("max_seq_items") or len(seq)

    s = iter(seq)
    # handle sets, no slicing
    r = [pprint_thing(next(s),
                      _nest_lvl + 1, max_seq_items=max_seq_items, **kwds)
         for i in range(min(nitems, len(seq)))]
    body = ", ".join(r)

    if nitems < len(seq):
        body += ", ..."
    elif isinstance(seq, tuple) and len(seq) == 1:
        body += ','

    return fmt.format(body=body)
Exemple #4
0
def _use_inf_as_na(key):
    """Option change callback for na/inf behaviour
    Choose which replacement for numpy.isnan / -numpy.isfinite is used.

    Parameters
    ----------
    flag: bool
        True means treat None, NaN, INF, -INF as null (old way),
        False means None and NaN are null, but INF, -INF are not null
        (new way).

    Notes
    -----
    This approach to setting global module values is discussed and
    approved here:

    * http://stackoverflow.com/questions/4859217/
      programmatically-creating-variables-in-python/4859312#4859312
    """
    from pandas._config import get_option
    flag = get_option(key)
    if flag:
        globals()['_isna'] = _isna_old
    else:
        globals()['_isna'] = _isna_new
Exemple #5
0
    def _format_data(self):

        # TODO: integrate with categorical and make generic
        # name argument is unused here; just for compat with base / categorical
        n = len(self)
        max_seq_items = min((get_option(
            'display.max_seq_items') or n) // 10, 10)

        formatter = str

        if n == 0:
            summary = '[]'
        elif n == 1:
            first = formatter(self[0])
            summary = '[{first}]'.format(first=first)
        elif n == 2:
            first = formatter(self[0])
            last = formatter(self[-1])
            summary = '[{first}, {last}]'.format(first=first, last=last)
        else:

            if n > max_seq_items:
                n = min(max_seq_items // 2, 10)
                head = [formatter(x) for x in self[:n]]
                tail = [formatter(x) for x in self[-n:]]
                summary = '[{head} ... {tail}]'.format(
                    head=', '.join(head), tail=', '.join(tail))
            else:
                tail = [formatter(x) for x in self]
                summary = '[{tail}]'.format(tail=', '.join(tail))

        return summary
Exemple #6
0
    def __bytes__(self):
        """
        Return a string representation for a particular object.
        """
        from pandas._config import get_option

        encoding = get_option("display.encoding")
        return self.__unicode__().encode(encoding, 'replace')
Exemple #7
0
 def _format_attrs(self):
     """
     Return a list of tuples of the (attr,formatted_value)
     """
     max_categories = (10 if get_option("display.max_categories") == 0 else
                       get_option("display.max_categories"))
     attrs = [
         ('categories',
          ibase.default_pprint(self.categories,
                               max_seq_items=max_categories)),
         ('ordered', self.ordered)]
     if self.name is not None:
         attrs.append(('name', ibase.default_pprint(self.name)))
     attrs.append(('dtype', "'%s'" % self.dtype.name))
     max_seq_items = get_option('display.max_seq_items') or len(self)
     if len(self) > max_seq_items:
         attrs.append(('length', len(self)))
     return attrs
Exemple #8
0
    def __bytes__(self):
        """
        Return a string representation for a particular object.

        Invoked by bytes(obj) in py3 only.
        Yields a bytestring in both py2/py3.
        """
        from pandas._config import get_option

        encoding = get_option("display.encoding")
        return self.__unicode__().encode(encoding, 'replace')
Exemple #9
0
    def __init__(self, formatter, classes=None, border=None):
        self.fmt = formatter
        self.classes = classes

        self.frame = self.fmt.frame
        self.columns = self.fmt.tr_frame.columns
        self.elements = []
        self.bold_rows = self.fmt.kwds.get('bold_rows', False)
        self.escape = self.fmt.kwds.get('escape', True)
        self.show_dimensions = self.fmt.show_dimensions
        if border is None:
            border = get_option('display.html.border')
        self.border = border
        self.table_id = self.fmt.table_id
        self.render_links = self.fmt.render_links
Exemple #10
0
def _get_level_lengths(index, hidden_elements=None):
    """
    Given an index, find the level length for each element.

    Optional argument is a list of index positions which
    should not be visible.

    Result is a dictionary of (level, inital_position): span
    """
    sentinel = object()
    levels = index.format(sparsify=sentinel, adjoin=False, names=False)

    if hidden_elements is None:
        hidden_elements = []

    lengths = {}
    if index.nlevels == 1:
        for i, value in enumerate(levels):
            if(i not in hidden_elements):
                lengths[(0, i)] = 1
        return lengths

    for i, lvl in enumerate(levels):
        for j, row in enumerate(lvl):
            if not get_option('display.multi_sparse'):
                lengths[(i, j)] = 1
            elif (row != sentinel) and (j not in hidden_elements):
                last_label = j
                lengths[(i, last_label)] = 1
            elif (row != sentinel):
                # even if its hidden, keep track of it in case
                # length >1 and later elements are visible
                last_label = j
                lengths[(i, last_label)] = 0
            elif(j not in hidden_elements):
                lengths[(i, last_label)] += 1

    non_zero_lengths = {
        element: length for element, length in lengths.items() if length >= 1}

    return non_zero_lengths
Exemple #11
0
    def __init__(self, data, precision=None, table_styles=None, uuid=None,
                 caption=None, table_attributes=None, cell_ids=True):
        self.ctx = defaultdict(list)
        self._todo = []

        if not isinstance(data, (pd.Series, pd.DataFrame)):
            raise TypeError("``data`` must be a Series or DataFrame")
        if data.ndim == 1:
            data = data.to_frame()
        if not data.index.is_unique or not data.columns.is_unique:
            raise ValueError("style is not supported for non-unique indices.")

        self.data = data
        self.index = data.index
        self.columns = data.columns

        self.uuid = uuid
        self.table_styles = table_styles
        self.caption = caption
        if precision is None:
            precision = get_option('display.precision')
        self.precision = precision
        self.table_attributes = table_attributes
        self.hidden_index = False
        self.hidden_columns = []
        self.cell_ids = cell_ids

        # display_funcs maps (row, col) -> formatting function

        def default_display_func(x):
            if is_float(x):
                return '{:>.{precision}g}'.format(x, precision=self.precision)
            else:
                return x

        self._display_funcs = defaultdict(lambda: default_display_func)
Exemple #12
0
def format_object_attrs(obj):
    """
    Return a list of tuples of the (attr, formatted_value)
    for common attrs, including dtype, name, length

    Parameters
    ----------
    obj : object
        must be iterable

    Returns
    -------
    list

    """
    attrs = []
    if hasattr(obj, 'dtype'):
        attrs.append(('dtype', "'{}'".format(obj.dtype)))
    if getattr(obj, 'name', None) is not None:
        attrs.append(('name', default_pprint(obj.name)))
    max_seq_items = get_option('display.max_seq_items') or len(obj)
    if len(obj) > max_seq_items:
        attrs.append(('length', len(obj)))
    return attrs
Exemple #13
0
                                        ABCMultiIndex, ABCPeriodIndex,
                                        ABCSeries)
from pandas.core.dtypes.missing import isna, notna

import pandas.core.common as com

from pandas.io.formats.printing import pprint_thing
from pandas.plotting._matplotlib import converter
from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
from pandas.plotting._matplotlib.style import _get_standard_colors
from pandas.plotting._matplotlib.tools import (_flatten, _get_all_lines,
                                               _get_xlim, _handle_shared_axes,
                                               _subplots, format_date_labels,
                                               table)

if get_option('plotting.matplotlib.register_converters'):
    converter.register(explicit=False)


class MPLPlot:
    """
    Base class for assembling a pandas plot using matplotlib

    Parameters
    ----------
    data :

    """
    @property
    def _kind(self):
        """Specify kind str. Must be overridden in child class"""
Exemple #14
0
 def max_rows(self) -> int:
     """Maximum info rows to be displayed."""
     return get_option("display.max_info_rows", len(self.data) + 1)
Exemple #15
0
def pprint_thing(thing,
                 _nest_lvl=0,
                 escape_chars=None,
                 default_escapes=False,
                 quote_strings=False,
                 max_seq_items=None):
    """
    This function is the sanctioned way of converting objects
    to a unicode representation.

    properly handles nested sequences containing unicode strings
    (unicode(object) does not)

    Parameters
    ----------
    thing : anything to be formatted
    _nest_lvl : internal use only. pprint_thing() is mutually-recursive
        with pprint_sequence, this argument is used to keep track of the
        current nesting level, and limit it.
    escape_chars : list or dict, optional
        Characters to escape. If a dict is passed the values are the
        replacements
    default_escapes : bool, default False
        Whether the input escape characters replaces or adds to the defaults
    max_seq_items : False, int, default None
        Pass thru to other pretty printers to limit sequence printing

    Returns
    -------
    result - unicode str

    """
    def as_escaped_unicode(thing, escape_chars=escape_chars):
        # Unicode is fine, else we try to decode using utf-8 and 'replace'
        # if that's not it either, we have no way of knowing and the user
        # should deal with it himself.

        try:
            result = str(thing)  # we should try this first
        except UnicodeDecodeError:
            # either utf-8 or we replace errors
            result = str(thing).decode('utf-8', "replace")

        translate = {
            '\t': r'\t',
            '\n': r'\n',
            '\r': r'\r',
        }
        if isinstance(escape_chars, dict):
            if default_escapes:
                translate.update(escape_chars)
            else:
                translate = escape_chars
            escape_chars = list(escape_chars.keys())
        else:
            escape_chars = escape_chars or tuple()
        for c in escape_chars:
            result = result.replace(c, translate[c])

        return str(result)

    if hasattr(thing, '__next__'):
        return str(thing)
    elif (isinstance(thing, dict)
          and _nest_lvl < get_option("display.pprint_nest_depth")):
        result = _pprint_dict(thing,
                              _nest_lvl,
                              quote_strings=True,
                              max_seq_items=max_seq_items)
    elif (is_sequence(thing)
          and _nest_lvl < get_option("display.pprint_nest_depth")):
        result = _pprint_seq(thing,
                             _nest_lvl,
                             escape_chars=escape_chars,
                             quote_strings=quote_strings,
                             max_seq_items=max_seq_items)
    elif isinstance(thing, str) and quote_strings:
        result = "'{thing}'".format(thing=as_escaped_unicode(thing))
    else:
        result = as_escaped_unicode(thing)

    return str(result)  # always unicode
Exemple #16
0
def format_object_summary(obj,
                          formatter,
                          is_justify=True,
                          name=None,
                          indent_for_name=True):
    """
    Return the formatted obj as a unicode string

    Parameters
    ----------
    obj : object
        must be iterable and support __getitem__
    formatter : callable
        string formatter for an element
    is_justify : boolean
        should justify the display
    name : name, optional
        defaults to the class name of the obj
    indent_for_name : bool, default True
        Whether subsequent lines should be be indented to
        align with the name.

    Returns
    -------
    summary string

    """
    from pandas.io.formats.console import get_console_size
    from pandas.io.formats.format import _get_adjustment

    display_width, _ = get_console_size()
    if display_width is None:
        display_width = get_option('display.width') or 80
    if name is None:
        name = obj.__class__.__name__

    if indent_for_name:
        name_len = len(name)
        space1 = "\n%s" % (' ' * (name_len + 1))
        space2 = "\n%s" % (' ' * (name_len + 2))
    else:
        space1 = "\n"
        space2 = "\n "  # space for the opening '['

    n = len(obj)
    sep = ','
    max_seq_items = get_option('display.max_seq_items') or n

    # are we a truncated display
    is_truncated = n > max_seq_items

    # adj can optionally handle unicode eastern asian width
    adj = _get_adjustment()

    def _extend_line(s, line, value, display_width, next_line_prefix):

        if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width):
            s += line.rstrip()
            line = next_line_prefix
        line += value
        return s, line

    def best_len(values):
        if values:
            return max(adj.len(x) for x in values)
        else:
            return 0

    close = ', '

    if n == 0:
        summary = '[]{}'.format(close)
    elif n == 1:
        first = formatter(obj[0])
        summary = '[{}]{}'.format(first, close)
    elif n == 2:
        first = formatter(obj[0])
        last = formatter(obj[-1])
        summary = '[{}, {}]{}'.format(first, last, close)
    else:

        if n > max_seq_items:
            n = min(max_seq_items // 2, 10)
            head = [formatter(x) for x in obj[:n]]
            tail = [formatter(x) for x in obj[-n:]]
        else:
            head = []
            tail = [formatter(x) for x in obj]

        # adjust all values to max length if needed
        if is_justify:

            # however, if we are not truncated and we are only a single
            # line, then don't justify
            if (is_truncated
                    or not (len(', '.join(head)) < display_width
                            and len(', '.join(tail)) < display_width)):
                max_len = max(best_len(head), best_len(tail))
                head = [x.rjust(max_len) for x in head]
                tail = [x.rjust(max_len) for x in tail]

        summary = ""
        line = space2

        for i in range(len(head)):
            word = head[i] + sep + ' '
            summary, line = _extend_line(summary, line, word, display_width,
                                         space2)

        if is_truncated:
            # remove trailing space of last line
            summary += line.rstrip() + space2 + '...'
            line = space2

        for i in range(len(tail) - 1):
            word = tail[i] + sep + ' '
            summary, line = _extend_line(summary, line, word, display_width,
                                         space2)

        # last value: no sep added + 1 space of width used for trailing ','
        summary, line = _extend_line(summary, line, tail[-1],
                                     display_width - 2, space2)
        summary += line

        # right now close is either '' or ', '
        # Now we want to include the ']', but not the maybe space.
        close = ']' + close.rstrip(' ')
        summary += close

        if len(summary) > (display_width):
            summary += space1
        else:  # one row
            summary += ' '

        # remove initial space
        summary = '[' + summary[len(space2):]

    return summary
Exemple #17
0
            UserWarning)

except ImportError:  # pragma: no cover
    pass

_USE_BOTTLENECK = False


def set_use_bottleneck(v=True):
    # set/unset to use bottleneck
    global _USE_BOTTLENECK
    if _BOTTLENECK_INSTALLED:
        _USE_BOTTLENECK = v


set_use_bottleneck(get_option('compute.use_bottleneck'))


class disallow(object):
    def __init__(self, *dtypes):
        super(disallow, self).__init__()
        self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)

    def check(self, obj):
        return hasattr(obj, 'dtype') and issubclass(obj.dtype.type,
                                                    self.dtypes)

    def __call__(self, f):
        @functools.wraps(f)
        def _f(*args, **kwargs):
            obj_iter = itertools.chain(args, compat.itervalues(kwargs))
Exemple #18
0
    def _translate(self,
                   sparsify_index: bool | None = None,
                   sparsify_cols: bool | None = None):
        """
        Process Styler data and settings into a dict for template rendering.

        Convert data and settings from ``Styler`` attributes such as ``self.data``,
        ``self.tooltips`` including applying any methods in ``self._todo``.

        Parameters
        ----------
        sparsify_index : bool, optional
            Whether to sparsify the index or print all hierarchical index elements
        sparsify_cols : bool, optional
            Whether to sparsify the columns or print all hierarchical column elements

        Returns
        -------
        d : dict
            The following structure: {uuid, table_styles, caption, head, body,
            cellstyle, table_attributes}
        """
        if sparsify_index is None:
            sparsify_index = get_option("display.multi_sparse")
        if sparsify_cols is None:
            sparsify_cols = get_option("display.multi_sparse")

        ROW_HEADING_CLASS = "row_heading"
        COL_HEADING_CLASS = "col_heading"
        INDEX_NAME_CLASS = "index_name"

        DATA_CLASS = "data"
        BLANK_CLASS = "blank"
        BLANK_VALUE = "&nbsp;"

        # construct render dict
        d = {
            "uuid": self.uuid,
            "table_styles": _format_table_styles(self.table_styles or []),
            "caption": self.caption,
        }

        head = self._translate_header(BLANK_CLASS, BLANK_VALUE,
                                      INDEX_NAME_CLASS, COL_HEADING_CLASS,
                                      sparsify_cols)
        d.update({"head": head})

        self.cellstyle_map: DefaultDict[tuple[CSSPair, ...],
                                        list[str]] = defaultdict(list)
        body = self._translate_body(DATA_CLASS, ROW_HEADING_CLASS,
                                    sparsify_index)
        d.update({"body": body})

        cellstyle: list[dict[str, CSSList | list[str]]] = [{
            "props":
            list(props),
            "selectors":
            selectors
        } for props, selectors in self.cellstyle_map.items()]
        d.update({"cellstyle": cellstyle})

        table_attr = self.table_attributes
        use_mathjax = get_option("display.html.use_mathjax")
        if not use_mathjax:
            table_attr = table_attr or ""
            if 'class="' in table_attr:
                table_attr = table_attr.replace('class="',
                                                'class="tex2jax_ignore ')
            else:
                table_attr += ' class="tex2jax_ignore"'
        d.update({"table_attributes": table_attr})

        if self.tooltips:
            d = self.tooltips._translate(self.data, self.uuid, d)

        return d
Exemple #19
0
except ImportError:
    _HAVE_FASTPARQUET = False

pytestmark = pytest.mark.filterwarnings(
    "ignore:RangeIndex.* is deprecated:DeprecationWarning")

# TODO(ArrayManager) fastparquet relies on BlockManager internals


# setup engines & skips
@pytest.fixture(params=[
    pytest.param(
        "fastparquet",
        marks=pytest.mark.skipif(
            not _HAVE_FASTPARQUET
            or get_option("mode.data_manager") == "array",
            reason="fastparquet is not installed or ArrayManager is used",
        ),
    ),
    pytest.param(
        "pyarrow",
        marks=pytest.mark.skipif(not _HAVE_PYARROW,
                                 reason="pyarrow is not installed"),
    ),
])
def engine(request):
    return request.param


@pytest.fixture
def pa():
Exemple #20
0
def format_object_summary(
    obj,
    formatter: Callable,
    is_justify: bool = True,
    name: Optional[str] = None,
    indent_for_name: bool = True,
    line_break_each_value: bool = False,
) -> str:
    """
    Return the formatted obj as a unicode string

    Parameters
    ----------
    obj : object
        must be iterable and support __getitem__
    formatter : callable
        string formatter for an element
    is_justify : boolean
        should justify the display
    name : name, optional
        defaults to the class name of the obj
    indent_for_name : bool, default True
        Whether subsequent lines should be be indented to
        align with the name.
    line_break_each_value : bool, default False
        If True, inserts a line break for each value of ``obj``.
        If False, only break lines when the a line of values gets wider
        than the display width.

        .. versionadded:: 0.25.0

    Returns
    -------
    summary string
    """
    from pandas.io.formats.console import get_console_size
    from pandas.io.formats.format import _get_adjustment

    display_width, _ = get_console_size()
    if display_width is None:
        display_width = get_option("display.width") or 80
    if name is None:
        name = type(obj).__name__

    if indent_for_name:
        name_len = len(name)
        space1 = f'\n{(" " * (name_len + 1))}'
        space2 = f'\n{(" " * (name_len + 2))}'
    else:
        space1 = "\n"
        space2 = "\n "  # space for the opening '['

    n = len(obj)
    if line_break_each_value:
        # If we want to vertically align on each value of obj, we need to
        # separate values by a line break and indent the values
        sep = ",\n " + " " * len(name)
    else:
        sep = ","
    max_seq_items = get_option("display.max_seq_items") or n

    # are we a truncated display
    is_truncated = n > max_seq_items

    # adj can optionally handle unicode eastern asian width
    adj = _get_adjustment()

    def _extend_line(
        s: str, line: str, value: str, display_width: int, next_line_prefix: str
    ) -> Tuple[str, str]:

        if adj.len(line.rstrip()) + adj.len(value.rstrip()) >= display_width:
            s += line.rstrip()
            line = next_line_prefix
        line += value
        return s, line

    def best_len(values: List[str]) -> int:
        if values:
            return max(adj.len(x) for x in values)
        else:
            return 0

    close = ", "

    if n == 0:
        summary = f"[]{close}"
    elif n == 1 and not line_break_each_value:
        first = formatter(obj[0])
        summary = f"[{first}]{close}"
    elif n == 2 and not line_break_each_value:
        first = formatter(obj[0])
        last = formatter(obj[-1])
        summary = f"[{first}, {last}]{close}"
    else:

        if n > max_seq_items:
            n = min(max_seq_items // 2, 10)
            head = [formatter(x) for x in obj[:n]]
            tail = [formatter(x) for x in obj[-n:]]
        else:
            head = []
            tail = [formatter(x) for x in obj]

        # adjust all values to max length if needed
        if is_justify:
            if line_break_each_value:
                # Justify each string in the values of head and tail, so the
                # strings will right align when head and tail are stacked
                # vertically.
                head, tail = _justify(head, tail)
            elif is_truncated or not (
                len(", ".join(head)) < display_width
                and len(", ".join(tail)) < display_width
            ):
                # Each string in head and tail should align with each other
                max_length = max(best_len(head), best_len(tail))
                head = [x.rjust(max_length) for x in head]
                tail = [x.rjust(max_length) for x in tail]
            # If we are not truncated and we are only a single
            # line, then don't justify

        if line_break_each_value:
            # Now head and tail are of type List[Tuple[str]]. Below we
            # convert them into List[str], so there will be one string per
            # value. Also truncate items horizontally if wider than
            # max_space
            max_space = display_width - len(space2)
            value = tail[0]
            for max_items in reversed(range(1, len(value) + 1)):
                pprinted_seq = _pprint_seq(value, max_seq_items=max_items)
                if len(pprinted_seq) < max_space:
                    break
            head = [_pprint_seq(x, max_seq_items=max_items) for x in head]
            tail = [_pprint_seq(x, max_seq_items=max_items) for x in tail]

        summary = ""
        line = space2

        for max_items in range(len(head)):
            word = head[max_items] + sep + " "
            summary, line = _extend_line(summary, line, word, display_width, space2)

        if is_truncated:
            # remove trailing space of last line
            summary += line.rstrip() + space2 + "..."
            line = space2

        for max_items in range(len(tail) - 1):
            word = tail[max_items] + sep + " "
            summary, line = _extend_line(summary, line, word, display_width, space2)

        # last value: no sep added + 1 space of width used for trailing ','
        summary, line = _extend_line(summary, line, tail[-1], display_width - 2, space2)
        summary += line

        # right now close is either '' or ', '
        # Now we want to include the ']', but not the maybe space.
        close = "]" + close.rstrip(" ")
        summary += close

        if len(summary) > (display_width) or line_break_each_value:
            summary += space1
        else:  # one row
            summary += " "

        # remove initial space
        summary = "[" + summary[len(space2) :]

    return summary
Exemple #21
0
from pandas.io.formats.printing import pprint_thing
from pandas.plotting._matplotlib import converter
from pandas.plotting._matplotlib.compat import _mpl_ge_3_0_0
from pandas.plotting._matplotlib.style import _get_standard_colors
from pandas.plotting._matplotlib.tools import (
    _flatten,
    _get_all_lines,
    _get_xlim,
    _handle_shared_axes,
    _subplots,
    format_date_labels,
    table,
)

if get_option("plotting.matplotlib.register_converters"):
    converter.register(explicit=False)


class MPLPlot:
    """
    Base class for assembling a pandas plot using matplotlib

    Parameters
    ----------
    data :

    """
    @property
    def _kind(self):
        """Specify kind str. Must be overridden in child class"""
Exemple #22
0
def pprint_thing(
    thing: Any,
    _nest_lvl: int = 0,
    escape_chars: Optional[EscapeChars] = None,
    default_escapes: bool = False,
    quote_strings: bool = False,
    max_seq_items: Optional[int] = None,
) -> str:
    """
    This function is the sanctioned way of converting objects
    to a string representation and properly handles nested sequences.

    Parameters
    ----------
    thing : anything to be formatted
    _nest_lvl : internal use only. pprint_thing() is mutually-recursive
        with pprint_sequence, this argument is used to keep track of the
        current nesting level, and limit it.
    escape_chars : list or dict, optional
        Characters to escape. If a dict is passed the values are the
        replacements
    default_escapes : bool, default False
        Whether the input escape characters replaces or adds to the defaults
    max_seq_items : int or None, default None
        Pass through to other pretty printers to limit sequence printing

    Returns
    -------
    str
    """

    def as_escaped_string(
        thing: Any, escape_chars: Optional[EscapeChars] = escape_chars
    ) -> str:
        translate = {"\t": r"\t", "\n": r"\n", "\r": r"\r"}
        if isinstance(escape_chars, dict):
            if default_escapes:
                translate.update(escape_chars)
            else:
                translate = escape_chars
            escape_chars = list(escape_chars.keys())
        else:
            escape_chars = escape_chars or tuple()

        result = str(thing)
        for c in escape_chars:
            result = result.replace(c, translate[c])
        return result

    if hasattr(thing, "__next__"):
        return str(thing)
    elif isinstance(thing, dict) and _nest_lvl < get_option(
        "display.pprint_nest_depth"
    ):
        result = _pprint_dict(
            thing, _nest_lvl, quote_strings=True, max_seq_items=max_seq_items
        )
    elif is_sequence(thing) and _nest_lvl < get_option("display.pprint_nest_depth"):
        result = _pprint_seq(
            thing,
            _nest_lvl,
            escape_chars=escape_chars,
            quote_strings=quote_strings,
            max_seq_items=max_seq_items,
        )
    elif isinstance(thing, str) and quote_strings:
        result = "'{thing}'".format(thing=as_escaped_string(thing))
    else:
        result = as_escaped_string(thing)

    return result
Exemple #23
0
def _initialize_memory_usage(
    memory_usage: bool | str | None = None, ) -> bool | str:
    """Get memory usage based on inputs and display options."""
    if memory_usage is None:
        memory_usage = get_option("display.memory_usage")
    return memory_usage
Exemple #24
0
 def _initialize_max_cols(self, max_cols: int | None) -> int:
     if max_cols is None:
         return get_option("display.max_info_columns", self.col_count + 1)
     return max_cols
Exemple #25
0
def pprint_thing(thing, _nest_lvl=0, escape_chars=None, default_escapes=False,
                 quote_strings=False, max_seq_items=None):
    """
    This function is the sanctioned way of converting objects
    to a unicode representation.

    properly handles nested sequences containing unicode strings
    (unicode(object) does not)

    Parameters
    ----------
    thing : anything to be formatted
    _nest_lvl : internal use only. pprint_thing() is mutually-recursive
        with pprint_sequence, this argument is used to keep track of the
        current nesting level, and limit it.
    escape_chars : list or dict, optional
        Characters to escape. If a dict is passed the values are the
        replacements
    default_escapes : bool, default False
        Whether the input escape characters replaces or adds to the defaults
    max_seq_items : False, int, default None
        Pass thru to other pretty printers to limit sequence printing

    Returns
    -------
    result - unicode object on py2, str on py3. Always Unicode.

    """

    def as_escaped_unicode(thing, escape_chars=escape_chars):
        # Unicode is fine, else we try to decode using utf-8 and 'replace'
        # if that's not it either, we have no way of knowing and the user
        # should deal with it himself.

        try:
            result = str(thing)  # we should try this first
        except UnicodeDecodeError:
            # either utf-8 or we replace errors
            result = str(thing).decode('utf-8', "replace")

        translate = {'\t': r'\t', '\n': r'\n', '\r': r'\r', }
        if isinstance(escape_chars, dict):
            if default_escapes:
                translate.update(escape_chars)
            else:
                translate = escape_chars
            escape_chars = list(escape_chars.keys())
        else:
            escape_chars = escape_chars or tuple()
        for c in escape_chars:
            result = result.replace(c, translate[c])

        return str(result)

    if hasattr(thing, '__next__'):
        return str(thing)
    elif (isinstance(thing, dict) and
          _nest_lvl < get_option("display.pprint_nest_depth")):
        result = _pprint_dict(thing, _nest_lvl, quote_strings=True,
                              max_seq_items=max_seq_items)
    elif (is_sequence(thing) and
          _nest_lvl < get_option("display.pprint_nest_depth")):
        result = _pprint_seq(thing, _nest_lvl, escape_chars=escape_chars,
                             quote_strings=quote_strings,
                             max_seq_items=max_seq_items)
    elif isinstance(thing, str) and quote_strings:
        result = "'{thing}'".format(thing=as_escaped_unicode(thing))
    else:
        result = as_escaped_unicode(thing)

    return str(result)  # always unicode
Exemple #26
0
    def _translate(self):
        """
        Convert the DataFrame in `self.data` and the attrs from `_build_styles`
        into a dictionary of {head, body, uuid, cellstyle}.
        """
        table_styles = self.table_styles or []
        caption = self.caption
        ctx = self.ctx
        precision = self.precision
        hidden_index = self.hidden_index
        hidden_columns = self.hidden_columns
        uuid = self.uuid or str(uuid1()).replace("-", "_")
        ROW_HEADING_CLASS = "row_heading"
        COL_HEADING_CLASS = "col_heading"
        INDEX_NAME_CLASS = "index_name"

        DATA_CLASS = "data"
        BLANK_CLASS = "blank"
        BLANK_VALUE = ""

        def format_attr(pair):
            return "{key}={value}".format(**pair)

        # for sparsifying a MultiIndex
        idx_lengths = _get_level_lengths(self.index)
        col_lengths = _get_level_lengths(self.columns, hidden_columns)

        cell_context = dict()

        n_rlvls = self.data.index.nlevels
        n_clvls = self.data.columns.nlevels
        rlabels = self.data.index.tolist()
        clabels = self.data.columns.tolist()

        if n_rlvls == 1:
            rlabels = [[x] for x in rlabels]
        if n_clvls == 1:
            clabels = [[x] for x in clabels]
        clabels = list(zip(*clabels))

        cellstyle = []
        head = []

        for r in range(n_clvls):
            # Blank for Index columns...
            row_es = [{"type": "th",
                       "value": BLANK_VALUE,
                       "display_value": BLANK_VALUE,
                       "is_visible": not hidden_index,
                       "class": " ".join([BLANK_CLASS])}] * (n_rlvls - 1)

            # ... except maybe the last for columns.names
            name = self.data.columns.names[r]
            cs = [BLANK_CLASS if name is None else INDEX_NAME_CLASS,
                  "level{lvl}".format(lvl=r)]
            name = BLANK_VALUE if name is None else name
            row_es.append({"type": "th",
                           "value": name,
                           "display_value": name,
                           "class": " ".join(cs),
                           "is_visible": not hidden_index})

            if clabels:
                for c, value in enumerate(clabels[r]):
                    cs = [COL_HEADING_CLASS, "level{lvl}".format(lvl=r),
                          "col{col}".format(col=c)]
                    cs.extend(cell_context.get(
                        "col_headings", {}).get(r, {}).get(c, []))
                    es = {
                        "type": "th",
                        "value": value,
                        "display_value": value,
                        "class": " ".join(cs),
                        "is_visible": _is_visible(c, r, col_lengths),
                    }
                    colspan = col_lengths.get((r, c), 0)
                    if colspan > 1:
                        es["attributes"] = [
                            format_attr({"key": "colspan", "value": colspan})
                        ]
                    row_es.append(es)
                head.append(row_es)

        if (self.data.index.names and
                com._any_not_none(*self.data.index.names) and
                not hidden_index):
            index_header_row = []

            for c, name in enumerate(self.data.index.names):
                cs = [INDEX_NAME_CLASS,
                      "level{lvl}".format(lvl=c)]
                name = '' if name is None else name
                index_header_row.append({"type": "th", "value": name,
                                         "class": " ".join(cs)})

            index_header_row.extend(
                [{"type": "th",
                  "value": BLANK_VALUE,
                  "class": " ".join([BLANK_CLASS])
                  }] * (len(clabels[0]) - len(hidden_columns)))

            head.append(index_header_row)

        body = []
        for r, idx in enumerate(self.data.index):
            row_es = []
            for c, value in enumerate(rlabels[r]):
                rid = [ROW_HEADING_CLASS, "level{lvl}".format(lvl=c),
                       "row{row}".format(row=r)]
                es = {
                    "type": "th",
                    "is_visible": (_is_visible(r, c, idx_lengths) and
                                   not hidden_index),
                    "value": value,
                    "display_value": value,
                    "id": "_".join(rid[1:]),
                    "class": " ".join(rid)
                }
                rowspan = idx_lengths.get((c, r), 0)
                if rowspan > 1:
                    es["attributes"] = [
                        format_attr({"key": "rowspan", "value": rowspan})
                    ]
                row_es.append(es)

            for c, col in enumerate(self.data.columns):
                cs = [DATA_CLASS, "row{row}".format(row=r),
                      "col{col}".format(col=c)]
                cs.extend(cell_context.get("data", {}).get(r, {}).get(c, []))
                formatter = self._display_funcs[(r, c)]
                value = self.data.iloc[r, c]
                row_dict = {"type": "td",
                            "value": value,
                            "class": " ".join(cs),
                            "display_value": formatter(value),
                            "is_visible": (c not in hidden_columns)}
                # only add an id if the cell has a style
                if (self.cell_ids or
                        not(len(ctx[r, c]) == 1 and ctx[r, c][0] == '')):
                    row_dict["id"] = "_".join(cs[1:])
                row_es.append(row_dict)
                props = []
                for x in ctx[r, c]:
                    # have to handle empty styles like ['']
                    if x.count(":"):
                        props.append(x.split(":"))
                    else:
                        props.append(['', ''])
                cellstyle.append({'props': props,
                                  'selector': "row{row}_col{col}"
                                  .format(row=r, col=c)})
            body.append(row_es)

        table_attr = self.table_attributes
        use_mathjax = get_option("display.html.use_mathjax")
        if not use_mathjax:
            table_attr = table_attr or ''
            if 'class="' in table_attr:
                table_attr = table_attr.replace('class="',
                                                'class="tex2jax_ignore ')
            else:
                table_attr += ' class="tex2jax_ignore"'

        return dict(head=head, cellstyle=cellstyle, body=body, uuid=uuid,
                    precision=precision, table_styles=table_styles,
                    caption=caption, table_attributes=table_attr)
Exemple #27
0
def format_object_summary(obj, formatter, is_justify=True, name=None,
                          indent_for_name=True):
    """
    Return the formatted obj as a unicode string

    Parameters
    ----------
    obj : object
        must be iterable and support __getitem__
    formatter : callable
        string formatter for an element
    is_justify : boolean
        should justify the display
    name : name, optional
        defaults to the class name of the obj
    indent_for_name : bool, default True
        Whether subsequent lines should be be indented to
        align with the name.

    Returns
    -------
    summary string

    """
    from pandas.io.formats.console import get_console_size
    from pandas.io.formats.format import _get_adjustment

    display_width, _ = get_console_size()
    if display_width is None:
        display_width = get_option('display.width') or 80
    if name is None:
        name = obj.__class__.__name__

    if indent_for_name:
        name_len = len(name)
        space1 = "\n%s" % (' ' * (name_len + 1))
        space2 = "\n%s" % (' ' * (name_len + 2))
    else:
        space1 = "\n"
        space2 = "\n "  # space for the opening '['

    n = len(obj)
    sep = ','
    max_seq_items = get_option('display.max_seq_items') or n

    # are we a truncated display
    is_truncated = n > max_seq_items

    # adj can optionally handle unicode eastern asian width
    adj = _get_adjustment()

    def _extend_line(s, line, value, display_width, next_line_prefix):

        if (adj.len(line.rstrip()) + adj.len(value.rstrip()) >=
                display_width):
            s += line.rstrip()
            line = next_line_prefix
        line += value
        return s, line

    def best_len(values):
        if values:
            return max(adj.len(x) for x in values)
        else:
            return 0

    close = ', '

    if n == 0:
        summary = '[]{}'.format(close)
    elif n == 1:
        first = formatter(obj[0])
        summary = '[{}]{}'.format(first, close)
    elif n == 2:
        first = formatter(obj[0])
        last = formatter(obj[-1])
        summary = '[{}, {}]{}'.format(first, last, close)
    else:

        if n > max_seq_items:
            n = min(max_seq_items // 2, 10)
            head = [formatter(x) for x in obj[:n]]
            tail = [formatter(x) for x in obj[-n:]]
        else:
            head = []
            tail = [formatter(x) for x in obj]

        # adjust all values to max length if needed
        if is_justify:

            # however, if we are not truncated and we are only a single
            # line, then don't justify
            if (is_truncated or
                    not (len(', '.join(head)) < display_width and
                         len(', '.join(tail)) < display_width)):
                max_len = max(best_len(head), best_len(tail))
                head = [x.rjust(max_len) for x in head]
                tail = [x.rjust(max_len) for x in tail]

        summary = ""
        line = space2

        for i in range(len(head)):
            word = head[i] + sep + ' '
            summary, line = _extend_line(summary, line, word,
                                         display_width, space2)

        if is_truncated:
            # remove trailing space of last line
            summary += line.rstrip() + space2 + '...'
            line = space2

        for i in range(len(tail) - 1):
            word = tail[i] + sep + ' '
            summary, line = _extend_line(summary, line, word,
                                         display_width, space2)

        # last value: no sep added + 1 space of width used for trailing ','
        summary, line = _extend_line(summary, line, tail[-1],
                                     display_width - 2, space2)
        summary += line

        # right now close is either '' or ', '
        # Now we want to include the ']', but not the maybe space.
        close = ']' + close.rstrip(' ')
        summary += close

        if len(summary) > (display_width):
            summary += space1
        else:  # one row
            summary += ' '

        # remove initial space
        summary = '[' + summary[len(space2):]

    return summary
Exemple #28
0
def _ensure_decoded(s):
    """ if we have bytes, decode them to unicode """
    if isinstance(s, (np.bytes_, bytes)):
        s = s.decode(get_option("display.encoding"))
    return s
Exemple #29
0
def infor(file, verbose=None, buf=None, max_cols=None, null_counts=None):

    lines = []

    lines.append(str(type(file)))
    lines.append(file.index._summary())

    if len(file.columns) == 0:
        lines.append("Empty {name}".format(name=type(file).__name__))
        fmt.buffer_put_lines(buf, lines)
        return

    cols = file.columns

    # hack
    if max_cols is None:
        max_cols = get_option("display.max_info_columns",
                              len(file.columns) + 1)

    max_rows = get_option("display.max_info_rows", len(file) + 1)

    if null_counts is None:
        show_counts = (len(file.columns) <= max_cols) and (len(file) <
                                                           max_rows)
    else:
        show_counts = null_counts
    exceeds_info_cols = len(file.columns) > max_cols

    def _verbose_repr():
        lines.append("Data columns (total %d columns):" % len(file.columns))
        space = max(len(pprint_thing(k)) for k in file.columns) + 4
        counts = None

        tmpl = "{count}{dtype}"
        if show_counts:
            counts = file.count()
            if len(cols) != len(counts):  # pragma: no cover
                raise AssertionError("Columns must equal counts "
                                     "({cols:d} != {counts:d})".format(
                                         cols=len(cols), counts=len(counts)))
            tmpl = "{count} non-null {dtype}"

        dtypes = file.dtypes
        for i, col in enumerate(file.columns):
            dtype = dtypes.iloc[i]
            col = pprint_thing(col)

            count = ""
            if show_counts:
                count = counts.iloc[i]

            lines.append(
                _put_str(col, space) + tmpl.format(count=count, dtype=dtype))

    def _non_verbose_repr():
        lines.append(file.columns._summary(name="Columns"))

    if verbose:
        _verbose_repr()
    elif verbose is False:  # specifically set to False, not nesc None
        _non_verbose_repr()
    else:
        if exceeds_info_cols:
            _non_verbose_repr()
        else:
            _verbose_repr()

    counts = file._data.get_dtype_counts()
    dtypes = [
        "{k}({kk:d})".format(k=k[0], kk=k[1]) for k in sorted(counts.items())
    ]
    lines.append("dtypes: {types}".format(types=", ".join(dtypes)))

    # fmt.buffer_put_lines(buf, lines)
    if any(isinstance(x, str) for x in lines):
        lines = [str(x) for x in lines]
    return "\n".join(lines)
Exemple #30
0
bn = import_optional_dependency("bottleneck",
                                raise_on_missing=False,
                                on_version="warn")
_BOTTLENECK_INSTALLED = bn is not None
_USE_BOTTLENECK = False


def set_use_bottleneck(v: bool = True) -> None:
    # set/unset to use bottleneck
    global _USE_BOTTLENECK
    if _BOTTLENECK_INSTALLED:
        _USE_BOTTLENECK = v


set_use_bottleneck(get_option("compute.use_bottleneck"))


class disallow:
    def __init__(self, *dtypes):
        super().__init__()
        self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)

    def check(self, obj) -> bool:
        return hasattr(obj, "dtype") and issubclass(obj.dtype.type,
                                                    self.dtypes)

    def __call__(self, f: F) -> F:
        @functools.wraps(f)
        def _f(*args, **kwargs):
            obj_iter = itertools.chain(args, kwargs.values())
Exemple #31
0
except ImportError:  # pragma: no cover
    pass


_USE_BOTTLENECK = False


def set_use_bottleneck(v=True):
    # set/unset to use bottleneck
    global _USE_BOTTLENECK
    if _BOTTLENECK_INSTALLED:
        _USE_BOTTLENECK = v


set_use_bottleneck(get_option('compute.use_bottleneck'))


class disallow(object):

    def __init__(self, *dtypes):
        super(disallow, self).__init__()
        self.dtypes = tuple(pandas_dtype(dtype).type for dtype in dtypes)

    def check(self, obj):
        return hasattr(obj, 'dtype') and issubclass(obj.dtype.type,
                                                    self.dtypes)

    def __call__(self, f):
        @functools.wraps(f)
        def _f(*args, **kwargs):

pytestmark = pytest.mark.filterwarnings(
    "ignore:RangeIndex.* is deprecated:DeprecationWarning"
)


# TODO(ArrayManager) fastparquet relies on BlockManager internals

# setup engines & skips
@pytest.fixture(
    params=[
        pytest.param(
            "fastparquet",
            marks=pytest.mark.skipif(
                not _HAVE_FASTPARQUET or get_option("mode.data_manager") == "array",
                reason="fastparquet is not installed or ArrayManager is used",
            ),
        ),
        pytest.param(
            "pyarrow",
            marks=pytest.mark.skipif(
                not _HAVE_PYARROW, reason="pyarrow is not installed"
            ),
        ),
    ]
)
def engine(request):
    return request.param

Exemple #33
0
                                             'b_value': b_value},
                                 casting='safe')
        except ValueError as detail:
            if 'unknown type object' in str(detail):
                pass
        except Exception as detail:
            raise TypeError(str(detail))

    if result is None:
        result = _where_standard(cond, a, b)

    return result


# turn myself on
set_use_numexpr(get_option('compute.use_numexpr'))


def _has_bool_dtype(x):
    try:
        if isinstance(x, ABCDataFrame):
            return 'bool' in x.dtypes
        else:
            return x.dtype == bool
    except AttributeError:
        return isinstance(x, (bool, np.bool_))


def _bool_arith_check(op_str, a, b, not_allowed=frozenset(('/', '//', '**')),
                      unsupported=None):
    if unsupported is None:
def fp():
    if not _HAVE_FASTPARQUET:
        pytest.skip("fastparquet is not installed")
    elif get_option("mode.data_manager") == "array":
        pytest.skip("ArrayManager is not supported with fastparquet")
    return "fastparquet"
Exemple #35
0
    def _translate(self,
                   sparse_index: bool,
                   sparse_cols: bool,
                   blank: str = "&nbsp;"):
        """
        Process Styler data and settings into a dict for template rendering.

        Convert data and settings from ``Styler`` attributes such as ``self.data``,
        ``self.tooltips`` including applying any methods in ``self._todo``.

        Parameters
        ----------
        sparse_index : bool
            Whether to sparsify the index or print all hierarchical index elements.
            Upstream defaults are typically to `pandas.options.styler.sparse.index`.
        sparse_cols : bool
            Whether to sparsify the columns or print all hierarchical column elements.
            Upstream defaults are typically to `pandas.options.styler.sparse.columns`.

        Returns
        -------
        d : dict
            The following structure: {uuid, table_styles, caption, head, body,
            cellstyle, table_attributes}
        """
        ROW_HEADING_CLASS = "row_heading"
        COL_HEADING_CLASS = "col_heading"
        INDEX_NAME_CLASS = "index_name"
        TRIMMED_COL_CLASS = "col_trim"
        TRIMMED_ROW_CLASS = "row_trim"

        DATA_CLASS = "data"
        BLANK_CLASS = "blank"
        BLANK_VALUE = blank

        # construct render dict
        d = {
            "uuid": self.uuid,
            "table_styles": _format_table_styles(self.table_styles or []),
            "caption": self.caption,
        }

        max_elements = get_option("styler.render.max_elements")
        max_rows, max_cols = _get_trimming_maximums(len(self.data.index),
                                                    len(self.data.columns),
                                                    max_elements)

        head = self._translate_header(
            BLANK_CLASS,
            BLANK_VALUE,
            INDEX_NAME_CLASS,
            COL_HEADING_CLASS,
            sparse_cols,
            max_cols,
            TRIMMED_COL_CLASS,
        )
        d.update({"head": head})

        self.cellstyle_map: DefaultDict[tuple[CSSPair, ...],
                                        list[str]] = defaultdict(list)
        body = self._translate_body(
            DATA_CLASS,
            ROW_HEADING_CLASS,
            sparse_index,
            max_rows,
            max_cols,
            TRIMMED_ROW_CLASS,
            TRIMMED_COL_CLASS,
        )
        d.update({"body": body})

        cellstyle: list[dict[str, CSSList | list[str]]] = [{
            "props":
            list(props),
            "selectors":
            selectors
        } for props, selectors in self.cellstyle_map.items()]
        d.update({"cellstyle": cellstyle})

        table_attr = self.table_attributes
        use_mathjax = get_option("display.html.use_mathjax")
        if not use_mathjax:
            table_attr = table_attr or ""
            if 'class="' in table_attr:
                table_attr = table_attr.replace('class="',
                                                'class="tex2jax_ignore ')
            else:
                table_attr += ' class="tex2jax_ignore"'
        d.update({"table_attributes": table_attr})

        if self.tooltips:
            d = self.tooltips._translate(self.data, self.uuid, d)

        return d
Exemple #36
0
def info(
    data, verbose=None, buf=None, max_cols=None, memory_usage=None, null_counts=None
) -> None:
    """
    Print a concise summary of a DataFrame.

    This method prints information about a DataFrame including
    the index dtype and column dtypes, non-null values and memory usage.

    Parameters
    ----------
    data : DataFrame
        DataFrame to print information about.
    verbose : bool, optional
        Whether to print the full summary. By default, the setting in
        ``pandas.options.display.max_info_columns`` is followed.
    buf : writable buffer, defaults to sys.stdout
        Where to send the output. By default, the output is printed to
        sys.stdout. Pass a writable buffer if you need to further process
        the output.
    max_cols : int, optional
        When to switch from the verbose to the truncated output. If the
        DataFrame has more than `max_cols` columns, the truncated output
        is used. By default, the setting in
        ``pandas.options.display.max_info_columns`` is used.
    memory_usage : bool, str, optional
        Specifies whether total memory usage of the DataFrame
        elements (including the index) should be displayed. By default,
        this follows the ``pandas.options.display.memory_usage`` setting.

        True always show memory usage. False never shows memory usage.
        A value of 'deep' is equivalent to "True with deep introspection".
        Memory usage is shown in human-readable units (base-2
        representation). Without deep introspection a memory estimation is
        made based in column dtype and number of rows assuming values
        consume the same memory amount for corresponding dtypes. With deep
        memory introspection, a real memory usage calculation is performed
        at the cost of computational resources.
    null_counts : bool, optional
        Whether to show the non-null counts. By default, this is shown
        only if the frame is smaller than
        ``pandas.options.display.max_info_rows`` and
        ``pandas.options.display.max_info_columns``. A value of True always
        shows the counts, and False never shows the counts.

    Returns
    -------
    None
        This method prints a summary of a DataFrame and returns None.

    See Also
    --------
    DataFrame.describe: Generate descriptive statistics of DataFrame
        columns.
    DataFrame.memory_usage: Memory usage of DataFrame columns.

    Examples
    --------
    >>> int_values = [1, 2, 3, 4, 5]
    >>> text_values = ['alpha', 'beta', 'gamma', 'delta', 'epsilon']
    >>> float_values = [0.0, 0.25, 0.5, 0.75, 1.0]
    >>> df = pd.DataFrame({"int_col": int_values, "text_col": text_values,
    ...                   "float_col": float_values})
    >>> df
        int_col text_col  float_col
    0        1    alpha       0.00
    1        2     beta       0.25
    2        3    gamma       0.50
    3        4    delta       0.75
    4        5  epsilon       1.00

    Prints information of all columns:

    >>> df.info(verbose=True)
    <class 'pandas.core.frame.DataFrame'>
    RangeIndex: 5 entries, 0 to 4
    Data columns (total 3 columns):
        #   Column     Non-Null Count  Dtype
    ---  ------     --------------  -----
        0   int_col    5 non-null      int64
        1   text_col   5 non-null      object
        2   float_col  5 non-null      float64
    dtypes: float64(1), int64(1), object(1)
    memory usage: 248.0+ bytes

    Prints a summary of columns count and its dtypes but not per column
    information:

    >>> df.info(verbose=False)
    <class 'pandas.core.frame.DataFrame'>
    RangeIndex: 5 entries, 0 to 4
    Columns: 3 entries, int_col to float_col
    dtypes: float64(1), int64(1), object(1)
    memory usage: 248.0+ bytes

    Pipe output of DataFrame.info to buffer instead of sys.stdout, get
    buffer content and writes to a text file:

    >>> import io
    >>> buffer = io.StringIO()
    >>> df.info(buf=buffer)
    >>> s = buffer.getvalue()
    >>> with open("df_info.txt", "w",
    ...           encoding="utf-8") as f:  # doctest: +SKIP
    ...     f.write(s)
    260

    The `memory_usage` parameter allows deep introspection mode, specially
    useful for big DataFrames and fine-tune memory optimization:

    >>> random_strings_array = np.random.choice(['a', 'b', 'c'], 10 ** 6)
    >>> df = pd.DataFrame({
    ...     'column_1': np.random.choice(['a', 'b', 'c'], 10 ** 6),
    ...     'column_2': np.random.choice(['a', 'b', 'c'], 10 ** 6),
    ...     'column_3': np.random.choice(['a', 'b', 'c'], 10 ** 6)
    ... })
    >>> df.info()
    <class 'pandas.core.frame.DataFrame'>
    RangeIndex: 1000000 entries, 0 to 999999
    Data columns (total 3 columns):
        #   Column    Non-Null Count    Dtype
    ---  ------    --------------    -----
        0   column_1  1000000 non-null  object
        1   column_2  1000000 non-null  object
        2   column_3  1000000 non-null  object
    dtypes: object(3)
    memory usage: 22.9+ MB

    >>> df.info(memory_usage='deep')
    <class 'pandas.core.frame.DataFrame'>
    RangeIndex: 1000000 entries, 0 to 999999
    Data columns (total 3 columns):
        #   Column    Non-Null Count    Dtype
    ---  ------    --------------    -----
        0   column_1  1000000 non-null  object
        1   column_2  1000000 non-null  object
        2   column_3  1000000 non-null  object
    dtypes: object(3)
    memory usage: 188.8 MB
    """
    if buf is None:  # pragma: no cover
        buf = sys.stdout

    lines = []

    lines.append(str(type(data)))
    lines.append(data.index._summary())

    if len(data.columns) == 0:
        lines.append(f"Empty {type(data).__name__}")
        fmt.buffer_put_lines(buf, lines)
        return

    cols = data.columns
    col_count = len(data.columns)

    # hack
    if max_cols is None:
        max_cols = get_option("display.max_info_columns", len(data.columns) + 1)

    max_rows = get_option("display.max_info_rows", len(data) + 1)

    if null_counts is None:
        show_counts = (col_count <= max_cols) and (len(data) < max_rows)
    else:
        show_counts = null_counts
    exceeds_info_cols = col_count > max_cols

    def _verbose_repr():
        lines.append(f"Data columns (total {len(data.columns)} columns):")

        id_head = " # "
        column_head = "Column"
        col_space = 2

        max_col = max(len(pprint_thing(k)) for k in cols)
        len_column = len(pprint_thing(column_head))
        space = max(max_col, len_column) + col_space

        max_id = len(pprint_thing(col_count))
        len_id = len(pprint_thing(id_head))
        space_num = max(max_id, len_id) + col_space

        header = _put_str(id_head, space_num) + _put_str(column_head, space)
        if show_counts:
            counts = data.count()
            if len(cols) != len(counts):  # pragma: no cover
                raise AssertionError(
                    f"Columns must equal counts ({len(cols)} != {len(counts)})"
                )
            count_header = "Non-Null Count"
            len_count = len(count_header)
            non_null = " non-null"
            max_count = max(len(pprint_thing(k)) for k in counts) + len(non_null)
            space_count = max(len_count, max_count) + col_space
            count_temp = "{count}" + non_null
        else:
            count_header = ""
            space_count = len(count_header)
            len_count = space_count
            count_temp = "{count}"

        dtype_header = "Dtype"
        len_dtype = len(dtype_header)
        max_dtypes = max(len(pprint_thing(k)) for k in data.dtypes)
        space_dtype = max(len_dtype, max_dtypes)
        header += _put_str(count_header, space_count) + _put_str(
            dtype_header, space_dtype
        )

        lines.append(header)
        lines.append(
            _put_str("-" * len_id, space_num)
            + _put_str("-" * len_column, space)
            + _put_str("-" * len_count, space_count)
            + _put_str("-" * len_dtype, space_dtype)
        )

        for i, col in enumerate(data.columns):
            dtype = data.dtypes.iloc[i]
            col = pprint_thing(col)

            line_no = _put_str(f" {i}", space_num)
            count = ""
            if show_counts:
                count = counts.iloc[i]

            lines.append(
                line_no
                + _put_str(col, space)
                + _put_str(count_temp.format(count=count), space_count)
                + _put_str(dtype, space_dtype)
            )

    def _non_verbose_repr():
        lines.append(data.columns._summary(name="Columns"))

    def _sizeof_fmt(num, size_qualifier):
        # returns size in human readable format
        for x in ["bytes", "KB", "MB", "GB", "TB"]:
            if num < 1024.0:
                return f"{num:3.1f}{size_qualifier} {x}"
            num /= 1024.0
        return f"{num:3.1f}{size_qualifier} PB"

    if verbose:
        _verbose_repr()
    elif verbose is False:  # specifically set to False, not nesc None
        _non_verbose_repr()
    else:
        if exceeds_info_cols:
            _non_verbose_repr()
        else:
            _verbose_repr()

    # groupby dtype.name to collect e.g. Categorical columns
    counts = data.dtypes.value_counts().groupby(lambda x: x.name).sum()
    dtypes = [f"{k[0]}({k[1]:d})" for k in sorted(counts.items())]
    lines.append(f"dtypes: {', '.join(dtypes)}")

    if memory_usage is None:
        memory_usage = get_option("display.memory_usage")
    if memory_usage:
        # append memory usage of df to display
        size_qualifier = ""
        if memory_usage == "deep":
            deep = True
        else:
            # size_qualifier is just a best effort; not guaranteed to catch
            # all cases (e.g., it misses categorical data even with object
            # categories)
            deep = False
            if "object" in counts or data.index._is_memory_usage_qualified():
                size_qualifier = "+"
        mem_usage = data.memory_usage(index=True, deep=deep).sum()
        lines.append(f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n")
    fmt.buffer_put_lines(buf, lines)
Exemple #37
0
    if _can_use_numexpr(None, "where", a, b, "where"):

        result = ne.evaluate(
            "where(cond_value, a_value, b_value)",
            local_dict={"cond_value": cond, "a_value": a, "b_value": b},
            casting="safe",
        )

    if result is None:
        result = _where_standard(cond, a, b)

    return result


# turn myself on
set_use_numexpr(get_option("compute.use_numexpr"))


def _has_bool_dtype(x):
    try:
        return x.dtype == bool
    except AttributeError:
        return isinstance(x, (bool, np.bool_))


def _bool_arith_check(
    op_str, a, b, not_allowed=frozenset(("/", "//", "**")), unsupported=None
):
    if unsupported is None:
        unsupported = {"+": "|", "*": "&", "-": "^"}
Exemple #38
0
    def info(self) -> None:
        """
        Print a concise summary of a %(klass)s.

        This method prints information about a %(klass)s including
        the index dtype%(type_sub)s, non-null values and memory usage.

        Parameters
        ----------
        data : %(klass)s
            %(klass)s to print information about.
        verbose : bool, optional
            Whether to print the full summary. By default, the setting in
            ``pandas.options.display.max_info_columns`` is followed.
        buf : writable buffer, defaults to sys.stdout
            Where to send the output. By default, the output is printed to
            sys.stdout. Pass a writable buffer if you need to further process
            the output.
        %(max_cols_sub)s
        memory_usage : bool, str, optional
            Specifies whether total memory usage of the %(klass)s
            elements (including the index) should be displayed. By default,
            this follows the ``pandas.options.display.memory_usage`` setting.

            True always show memory usage. False never shows memory usage.
            A value of 'deep' is equivalent to "True with deep introspection".
            Memory usage is shown in human-readable units (base-2
            representation). Without deep introspection a memory estimation is
            made based in column dtype and number of rows assuming values
            consume the same memory amount for corresponding dtypes. With deep
            memory introspection, a real memory usage calculation is performed
            at the cost of computational resources.
        null_counts : bool, optional
            Whether to show the non-null counts. By default, this is shown
            only if the %(klass)s is smaller than
            ``pandas.options.display.max_info_rows`` and
            ``pandas.options.display.max_info_columns``. A value of True always
            shows the counts, and False never shows the counts.

        Returns
        -------
        None
            This method prints a summary of a %(klass)s and returns None.

        See Also
        --------
        %(see_also_sub)s

        Examples
        --------
        %(examples_sub)s
        """
        lines = []

        lines.append(str(type(self.data)))
        lines.append(self.data.index._summary())

        ids, dtypes = self._get_ids_and_dtypes()
        col_count = len(ids)

        if col_count == 0:
            lines.append(f"Empty {type(self.data).__name__}")
            fmt.buffer_put_lines(self.buf, lines)
            return

        # hack
        max_cols = self.max_cols
        if max_cols is None:
            max_cols = get_option("display.max_info_columns", col_count + 1)

        max_rows = get_option("display.max_info_rows", len(self.data) + 1)

        if self.null_counts is None:
            show_counts = (col_count <= max_cols) and (len(self.data) <
                                                       max_rows)
        else:
            show_counts = self.null_counts
        exceeds_info_cols = col_count > max_cols

        if self.verbose:
            self._verbose_repr(lines, ids, dtypes, show_counts)
        elif self.verbose is False:  # specifically set to False, not necessarily None
            self._non_verbose_repr(lines, ids)
        else:
            if exceeds_info_cols:
                self._non_verbose_repr(lines, ids)
            else:
                self._verbose_repr(lines, ids, dtypes, show_counts)

        # groupby dtype.name to collect e.g. Categorical columns
        counts = dtypes.value_counts().groupby(lambda x: x.name).sum()
        collected_dtypes = [
            f"{k[0]}({k[1]:d})" for k in sorted(counts.items())
        ]
        lines.append(f"dtypes: {', '.join(collected_dtypes)}")

        if self.memory_usage:
            # append memory usage of df to display
            size_qualifier = ""
            if self.memory_usage == "deep":
                deep = True
            else:
                # size_qualifier is just a best effort; not guaranteed to catch
                # all cases (e.g., it misses categorical data even with object
                # categories)
                deep = False
                if "object" in counts or self.data.index._is_memory_usage_qualified(
                ):
                    size_qualifier = "+"
            mem_usage = self._get_mem_usage(deep=deep)
            lines.append(
                f"memory usage: {_sizeof_fmt(mem_usage, size_qualifier)}\n")
        fmt.buffer_put_lines(self.buf, lines)
Exemple #39
0
    def _translate(self):
        """
        Convert the DataFrame in `self.data` and the attrs from `_build_styles`
        into a dictionary of {head, body, uuid, cellstyle}.
        """
        table_styles = self.table_styles or []
        caption = self.caption
        ctx = self.ctx
        precision = self.precision
        hidden_index = self.hidden_index
        hidden_columns = self.hidden_columns
        uuid = self.uuid or str(uuid1()).replace("-", "_")
        ROW_HEADING_CLASS = "row_heading"
        COL_HEADING_CLASS = "col_heading"
        INDEX_NAME_CLASS = "index_name"

        DATA_CLASS = "data"
        BLANK_CLASS = "blank"
        BLANK_VALUE = ""

        def format_attr(pair):
            return "{key}={value}".format(**pair)

        # for sparsifying a MultiIndex
        idx_lengths = _get_level_lengths(self.index)
        col_lengths = _get_level_lengths(self.columns, hidden_columns)

        cell_context = dict()

        n_rlvls = self.data.index.nlevels
        n_clvls = self.data.columns.nlevels
        rlabels = self.data.index.tolist()
        clabels = self.data.columns.tolist()

        if n_rlvls == 1:
            rlabels = [[x] for x in rlabels]
        if n_clvls == 1:
            clabels = [[x] for x in clabels]
        clabels = list(zip(*clabels))

        cellstyle = []
        head = []

        for r in range(n_clvls):
            # Blank for Index columns...
            row_es = [
                {
                    "type": "th",
                    "value": BLANK_VALUE,
                    "display_value": BLANK_VALUE,
                    "is_visible": not hidden_index,
                    "class": " ".join([BLANK_CLASS]),
                }
            ] * (n_rlvls - 1)

            # ... except maybe the last for columns.names
            name = self.data.columns.names[r]
            cs = [
                BLANK_CLASS if name is None else INDEX_NAME_CLASS,
                "level{lvl}".format(lvl=r),
            ]
            name = BLANK_VALUE if name is None else name
            row_es.append(
                {
                    "type": "th",
                    "value": name,
                    "display_value": name,
                    "class": " ".join(cs),
                    "is_visible": not hidden_index,
                }
            )

            if clabels:
                for c, value in enumerate(clabels[r]):
                    cs = [
                        COL_HEADING_CLASS,
                        "level{lvl}".format(lvl=r),
                        "col{col}".format(col=c),
                    ]
                    cs.extend(
                        cell_context.get("col_headings", {}).get(r, {}).get(c, [])
                    )
                    es = {
                        "type": "th",
                        "value": value,
                        "display_value": value,
                        "class": " ".join(cs),
                        "is_visible": _is_visible(c, r, col_lengths),
                    }
                    colspan = col_lengths.get((r, c), 0)
                    if colspan > 1:
                        es["attributes"] = [
                            format_attr({"key": "colspan", "value": colspan})
                        ]
                    row_es.append(es)
                head.append(row_es)

        if (
            self.data.index.names
            and com.any_not_none(*self.data.index.names)
            and not hidden_index
        ):
            index_header_row = []

            for c, name in enumerate(self.data.index.names):
                cs = [INDEX_NAME_CLASS, "level{lvl}".format(lvl=c)]
                name = "" if name is None else name
                index_header_row.append(
                    {"type": "th", "value": name, "class": " ".join(cs)}
                )

            index_header_row.extend(
                [{"type": "th", "value": BLANK_VALUE, "class": " ".join([BLANK_CLASS])}]
                * (len(clabels[0]) - len(hidden_columns))
            )

            head.append(index_header_row)

        body = []
        for r, idx in enumerate(self.data.index):
            row_es = []
            for c, value in enumerate(rlabels[r]):
                rid = [
                    ROW_HEADING_CLASS,
                    "level{lvl}".format(lvl=c),
                    "row{row}".format(row=r),
                ]
                es = {
                    "type": "th",
                    "is_visible": (_is_visible(r, c, idx_lengths) and not hidden_index),
                    "value": value,
                    "display_value": value,
                    "id": "_".join(rid[1:]),
                    "class": " ".join(rid),
                }
                rowspan = idx_lengths.get((c, r), 0)
                if rowspan > 1:
                    es["attributes"] = [
                        format_attr({"key": "rowspan", "value": rowspan})
                    ]
                row_es.append(es)

            for c, col in enumerate(self.data.columns):
                cs = [DATA_CLASS, "row{row}".format(row=r), "col{col}".format(col=c)]
                cs.extend(cell_context.get("data", {}).get(r, {}).get(c, []))
                formatter = self._display_funcs[(r, c)]
                value = self.data.iloc[r, c]
                row_dict = {
                    "type": "td",
                    "value": value,
                    "class": " ".join(cs),
                    "display_value": formatter(value),
                    "is_visible": (c not in hidden_columns),
                }
                # only add an id if the cell has a style
                if self.cell_ids or not (len(ctx[r, c]) == 1 and ctx[r, c][0] == ""):
                    row_dict["id"] = "_".join(cs[1:])
                row_es.append(row_dict)
                props = []
                for x in ctx[r, c]:
                    # have to handle empty styles like ['']
                    if x.count(":"):
                        props.append(x.split(":"))
                    else:
                        props.append(["", ""])
                cellstyle.append(
                    {
                        "props": props,
                        "selector": "row{row}_col{col}".format(row=r, col=c),
                    }
                )
            body.append(row_es)

        table_attr = self.table_attributes
        use_mathjax = get_option("display.html.use_mathjax")
        if not use_mathjax:
            table_attr = table_attr or ""
            if 'class="' in table_attr:
                table_attr = table_attr.replace('class="', 'class="tex2jax_ignore ')
            else:
                table_attr += ' class="tex2jax_ignore"'

        return dict(
            head=head,
            cellstyle=cellstyle,
            body=body,
            uuid=uuid,
            precision=precision,
            table_styles=table_styles,
            caption=caption,
            table_attributes=table_attr,
        )
Exemple #40
0
def _initialize_memory_usage(
    memory_usage: Optional[Union[bool, str]] = None, ) -> Union[bool, str]:
    """Get memory usage based on inputs and display options."""
    if memory_usage is None:
        memory_usage = get_option("display.memory_usage")
    return memory_usage