Example #1
0
    def run_binary(self, df, other, assert_func, test_flex=False,
                   numexpr_ops={'gt', 'lt', 'ge', 'le', 'eq', 'ne'}):
        """
        tests solely that the result is the same whether or not numexpr is
        enabled.  Need to test whether the function does the correct thing
        elsewhere.
        """
        expr._MIN_ELEMENTS = 0
        expr.set_test_mode(True)
        operations = ['gt', 'lt', 'ge', 'le', 'eq', 'ne']

        for arith in operations:
            if test_flex:
                op = lambda x, y: getattr(df, arith)(y)
                op.__name__ = arith
            else:
                op = getattr(operator, arith)
            expr.set_use_numexpr(False)
            expected = op(df, other)
            expr.set_use_numexpr(True)
            expr.get_test_result()
            result = op(df, other)
            used_numexpr = expr.get_test_result()
            try:
                if arith in numexpr_ops:
                    assert used_numexpr, "Did not use numexpr as expected."
                else:
                    assert not used_numexpr, "Used numexpr unexpectedly."
                assert_func(expected, result)
            except Exception:
                pprint_thing("Failed test with operation %r" % arith)
                pprint_thing("test_flex was %r" % test_flex)
                raise
Example #2
0
    def run_arithmetic(self, df, other, assert_func, check_dtype=False,
                       test_flex=True):
        expr._MIN_ELEMENTS = 0
        operations = ['add', 'sub', 'mul', 'mod', 'truediv', 'floordiv']
        for arith in operations:

            operator_name = arith
            if arith == 'div':
                operator_name = 'truediv'

            if test_flex:
                op = lambda x, y: getattr(df, arith)(y)
                op.__name__ = arith
            else:
                op = getattr(operator, operator_name)
            expr.set_use_numexpr(False)
            expected = op(df, other)
            expr.set_use_numexpr(True)

            result = op(df, other)
            try:
                if check_dtype:
                    if arith == 'truediv':
                        assert expected.dtype.kind == 'f'
                assert_func(expected, result)
            except Exception:
                pprint_thing("Failed test with operator %r" % op.__name__)
                raise
Example #3
0
File: html.py Project: clham/pandas
    def _write_cell(self, s, kind='td', indent=0, tags=None):
        if tags is not None:
            start_tag = '<{kind} {tags}>'.format(kind=kind, tags=tags)
        else:
            start_tag = '<{kind}>'.format(kind=kind)

        if self.escape:
            # escape & first to prevent double escaping of &
            esc = OrderedDict([('&', r'&amp;'), ('<', r'&lt;'),
                               ('>', r'&gt;')])
        else:
            esc = {}

        rs = pprint_thing(s, escape_chars=esc).strip()

        if self.render_links and _is_url(rs):
            rs_unescaped = pprint_thing(s, escape_chars={}).strip()
            start_tag += '<a href="{url}" target="_blank">'.format(
                url=rs_unescaped)
            end_a = '</a>'
        else:
            end_a = ''

        self.write(u'{start}{rs}{end_a}</{kind}>'.format(
            start=start_tag, rs=rs, end_a=end_a, kind=kind), indent)
Example #4
0
File: ops.py Project: pydata/pandas
 def __unicode__(self):
     """Print a generic n-ary operator and its operands using infix
     notation"""
     # recurse over the operands
     parened = ('({0})'.format(pprint_thing(opr))
                for opr in self.operands)
     return pprint_thing(' {0} '.format(self.op).join(parened))
Example #5
0
 def _print(result, error=None):
     if error is not None:
         error = str(error)
     v = ("%-16.16s [%-16.16s]: [typ->%-8.8s,obj->%-8.8s,"
          "key1->(%-4.4s),key2->(%-4.4s),axis->%s] %s" %
          (name, result, t, o, method1, method2, a, error or ''))
     if _verbose:
         pprint_thing(v)
Example #6
0
    def test_copy_index_name_checking(self):
        # don't want to be able to modify the index stored elsewhere after
        # making a copy

        self.ts.index.name = None
        assert self.ts.index.name is None
        assert self.ts is self.ts

        cp = self.ts.copy()
        cp.index.name = 'foo'
        printing.pprint_thing(self.ts.index.name)
        assert self.ts.index.name is None
Example #7
0
def test_repr_binary_type():
    import string
    letters = string.ascii_letters
    try:
        raw = bytes(letters, encoding=cf.get_option('display.encoding'))
    except TypeError:
        raw = bytes(letters)
    b = str(compat.bytes_to_str(raw))
    res = printing.pprint_thing(b, quote_strings=True)
    assert res == repr(b)
    res = printing.pprint_thing(b, quote_strings=False)
    assert res == b
Example #8
0
 def check_extension(cls, ext):
     """checks that path's extension against the Writer's supported
     extensions.  If it isn't supported, raises UnsupportedFiletypeError."""
     if ext.startswith('.'):
         ext = ext[1:]
     if not any(ext in extension for extension in cls.supported_extensions):
         msg = (u("Invalid extension for engine '{engine}': '{ext}'")
                .format(engine=pprint_thing(cls.engine),
                        ext=pprint_thing(ext)))
         raise ValueError(msg)
     else:
         return True
Example #9
0
def _replot_ax(ax, freq, kwargs):
    data = getattr(ax, '_plot_data', None)

    # clear current axes and data
    ax._plot_data = []
    ax.clear()

    _decorate_axes(ax, freq, kwargs)

    lines = []
    labels = []
    if data is not None:
        for series, plotf, kwds in data:
            series = series.copy()
            idx = series.index.asfreq(freq, how='S')
            series.index = idx
            ax._plot_data.append((series, plotf, kwds))

            # for tsplot
            if isinstance(plotf, str):
                # XXX _plot_classes is private and shouldn't be imported
                # here. But as tsplot is deprecated, and we'll remove this
                # code soon, it's probably better to not overcomplicate
                # things, and just leave this the way it was implemented
                from pandas.plotting._core import _plot_classes
                plotf = _plot_classes()[plotf]._plot

            lines.append(plotf(ax, series.index._mpl_repr(),
                               series.values, **kwds)[0])
            labels.append(pprint_thing(series.name))

    return lines, labels
Example #10
0
    def _summary(self, name=None):
        """
        Return a summarized representation.

        Parameters
        ----------
        name : str
            name to use in the summary representation

        Returns
        -------
        String with a summarized representation of the index
        """
        formatter = self._formatter_func
        if len(self) > 0:
            index_summary = ', %s to %s' % (formatter(self[0]),
                                            formatter(self[-1]))
        else:
            index_summary = ''

        if name is None:
            name = type(self).__name__
        result = '%s: %s entries%s' % (printing.pprint_thing(name),
                                       len(self), index_summary)
        if self.freq:
            result += '\nFreq: %s' % self.freqstr

        # display as values, not quoted
        result = result.replace("'", "")
        return result
Example #11
0
File: eval.py Project: Axik/pandas
def _convert_expression(expr):
    """Convert an object to an expression.

    Thus function converts an object to an expression (a unicode string) and
    checks to make sure it isn't empty after conversion. This is used to
    convert operators to their string representation for recursive calls to
    :func:`~pandas.eval`.

    Parameters
    ----------
    expr : object
        The object to be converted to a string.

    Returns
    -------
    s : unicode
        The string representation of an object.

    Raises
    ------
    ValueError
      * If the expression is empty.
    """
    s = pprint_thing(expr)
    _check_expression(s)
    return s
Example #12
0
 def check_keys_split(self, decoded):
     "checks that dict has only the appropriate keys for orient='split'"
     bad_keys = set(decoded.keys()).difference(set(self._split_keys))
     if bad_keys:
         bad_keys = ", ".join(bad_keys)
         raise ValueError(u("JSON data had unexpected key(s): {bad_keys}")
                          .format(bad_keys=pprint_thing(bad_keys)))
Example #13
0
def _replot_ax(ax, freq, kwargs):
    data = getattr(ax, '_plot_data', None)

    # clear current axes and data
    ax._plot_data = []
    ax.clear()

    _decorate_axes(ax, freq, kwargs)

    lines = []
    labels = []
    if data is not None:
        for series, plotf, kwds in data:
            series = series.copy()
            idx = series.index.asfreq(freq, how='S')
            series.index = idx
            ax._plot_data.append((series, plotf, kwds))

            # for tsplot
            if isinstance(plotf, compat.string_types):
                from pandas.plotting._core import _plot_klass
                plotf = _plot_klass[plotf]._plot

            lines.append(plotf(ax, series.index._mpl_repr(),
                               series.values, **kwds)[0])
            labels.append(pprint_thing(series.name))

    return lines, labels
Example #14
0
 def __repr__(self):
     """
     Return a string representation for this object.
     """
     prepr = pprint_thing(self, escape_chars=('\t', '\r', '\n'),
                          quote_strings=True)
     return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype)
Example #15
0
def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,
                  figsize=None, sharex=True, sharey=True, layout=None,
                  rot=0, ax=None, **kwargs):

    if figsize == 'default':
        # allowed to specify mpl default with 'default'
        warnings.warn("figsize='default' is deprecated. Specify figure "
                      "size by tuple instead", FutureWarning, stacklevel=5)
        figsize = None

    grouped = data.groupby(by)
    if column is not None:
        grouped = grouped[column]

    naxes = len(grouped)
    fig, axes = _subplots(naxes=naxes, figsize=figsize,
                          sharex=sharex, sharey=sharey, ax=ax,
                          layout=layout)

    _axes = _flatten(axes)

    for i, (key, group) in enumerate(grouped):
        ax = _axes[i]
        if numeric_only and isinstance(group, ABCDataFrame):
            group = group._get_numeric_data()
        plotf(group, ax, **kwargs)
        ax.set_title(pprint_thing(key))

    return fig, axes
    def parameter_mismatches(self):
        errs = []
        signature_params = self.signature_parameters
        doc_params = tuple(self.doc_parameters)
        missing = set(signature_params) - set(doc_params)
        if missing:
            errs.append(error('PR01', missing_params=pprint_thing(missing)))
        extra = set(doc_params) - set(signature_params)
        if extra:
            errs.append(error('PR02', unknown_params=pprint_thing(extra)))
        if (not missing and not extra and signature_params != doc_params
                and not (not signature_params and not doc_params)):
            errs.append(error('PR03',
                              actual_params=signature_params,
                              documented_params=doc_params))

        return errs
Example #17
0
    def parameter_mismatches(self):
        errs = []
        signature_params = self.signature_parameters
        doc_params = tuple(self.doc_parameters)
        missing = set(signature_params) - set(doc_params)
        if missing:
            errs.append(
                'Parameters {} not documented'.format(pprint_thing(missing)))
        extra = set(doc_params) - set(signature_params)
        if extra:
            errs.append('Unknown parameters {}'.format(pprint_thing(extra)))
        if (not missing and not extra and signature_params != doc_params
                and not (not signature_params and not doc_params)):
            errs.append('Wrong parameters order. ' +
                        'Actual: {!r}. '.format(signature_params) +
                        'Documented: {!r}'.format(doc_params))

        return errs
Example #18
0
    def __unicode__(self):
        """
        Return a string representation for this object.

        Invoked by unicode(df) in py2 only. Yields a Unicode String in both
        py2/py3.
        """
        prepr = pprint_thing(self, escape_chars=('\t', '\r', '\n'),
                             quote_strings=True)
        return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype)
Example #19
0
def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
                   colormap=None, **kwds):
    from math import sqrt, pi

    def function(amplitudes):
        def f(t):
            x1 = amplitudes[0]
            result = x1 / sqrt(2.0)

            # Take the rest of the coefficients and resize them
            # appropriately. Take a copy of amplitudes as otherwise numpy
            # deletes the element from amplitudes itself.
            coeffs = np.delete(np.copy(amplitudes), 0)
            coeffs.resize(int((coeffs.size + 1) / 2), 2)

            # Generate the harmonics and arguments for the sin and cos
            # functions.
            harmonics = np.arange(0, coeffs.shape[0]) + 1
            trig_args = np.outer(harmonics, t)

            result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) +
                             coeffs[:, 1, np.newaxis] * np.cos(trig_args),
                             axis=0)
            return result
        return f

    n = len(frame)
    class_col = frame[class_column]
    classes = frame[class_column].drop_duplicates()
    df = frame.drop(class_column, axis=1)
    t = np.linspace(-pi, pi, samples)
    used_legends = set()

    color_values = _get_standard_colors(num_colors=len(classes),
                                        colormap=colormap, color_type='random',
                                        color=color)
    colors = dict(zip(classes, color_values))
    if ax is None:
        ax = plt.gca(xlim=(-pi, pi))
    for i in range(n):
        row = df.iloc[i].values
        f = function(row)
        y = f(t)
        kls = class_col.iat[i]
        label = pprint_thing(kls)
        if label not in used_legends:
            used_legends.add(label)
            ax.plot(t, y, color=colors[kls], label=label, **kwds)
        else:
            ax.plot(t, y, color=colors[kls], **kwds)

    ax.legend(loc='upper right')
    ax.grid()
    return ax
Example #20
0
    def _write_cell(self, s, kind='td', indent=0, tags=None):
        if tags is not None:
            start_tag = '<{kind} {tags}>'.format(kind=kind, tags=tags)
        else:
            start_tag = '<{kind}>'.format(kind=kind)

        if self.escape:
            # escape & first to prevent double escaping of &
            esc = OrderedDict([('&', r'&amp;'), ('<', r'&lt;'),
                               ('>', r'&gt;')])
        else:
            esc = {}
        rs = pprint_thing(s, escape_chars=esc).strip()
        self.write(u'{start}{rs}</{kind}>'
                   .format(start=start_tag, rs=rs, kind=kind), indent)
Example #21
0
    def run_arithmetic(self, df, other, assert_func, check_dtype=False,
                       test_flex=True):
        expr._MIN_ELEMENTS = 0
        operations = ['add', 'sub', 'mul', 'mod', 'truediv', 'floordiv', 'pow']
        if not compat.PY3:
            operations.append('div')
        for arith in operations:

            # numpy >= 1.11 doesn't handle integers
            # raised to integer powers
            # https://github.com/pandas-dev/pandas/issues/15363
            if arith == 'pow' and not _np_version_under1p11:
                continue

            operator_name = arith
            if arith == 'div':
                operator_name = 'truediv'

            if test_flex:
                op = lambda x, y: getattr(df, arith)(y)
                op.__name__ = arith
            else:
                op = getattr(operator, operator_name)
            expr.set_use_numexpr(False)
            expected = op(df, other)
            expr.set_use_numexpr(True)

            result = op(df, other)
            try:
                if check_dtype:
                    if arith == 'truediv':
                        assert expected.dtype.kind == 'f'
                assert_func(expected, result)
            except Exception:
                pprint_thing("Failed test with operator %r" % op.__name__)
                raise
Example #22
0
        def writerow(self, row):
            def _check_as_is(x):
                return (self.quoting == csv.QUOTE_NONNUMERIC and
                        is_number(x)) or isinstance(x, str)

            row = [x if _check_as_is(x)
                   else pprint_thing(x).encode("utf-8") for x in row]

            self.writer.writerow([s for s in row])
            # Fetch UTF-8 output from the queue ...
            data = self.queue.getvalue()
            data = data.decode("utf-8")
            # ... and re-encode it into the target encoding
            data = self.encoder.encode(data)
            # write to the target stream
            self.stream.write(data)
            # empty queue
            self.queue.truncate(0)
Example #23
0
    def _make_plot(self):
        colors = self._get_colors()
        stacking_id = self._get_stacking_id()

        for i, (label, y) in enumerate(self._iter_data()):
            ax = self._get_ax(i)

            kwds = self.kwds.copy()

            label = pprint_thing(label)
            kwds['label'] = label

            style, kwds = self._apply_style_colors(colors, kwds, i, label)
            if style is not None:
                kwds['style'] = style

            kwds = self._make_plot_keywords(kwds, y)
            artists = self._plot(ax, y, column_num=i,
                                 stacking_id=stacking_id, **kwds)
            self._add_legend_handle(artists[0], label, index=i)
Example #24
0
    def apply_series_generator(self):
        series_gen = self.series_generator
        res_index = self.result_index

        i = None
        keys = []
        results = {}
        if self.ignore_failures:
            successes = []
            for i, v in enumerate(series_gen):
                try:
                    results[i] = self.f(v)
                    keys.append(v.name)
                    successes.append(i)
                except Exception:
                    pass

            # so will work with MultiIndex
            if len(successes) < len(res_index):
                res_index = res_index.take(successes)

        else:
            try:
                for i, v in enumerate(series_gen):
                    results[i] = self.f(v)
                    keys.append(v.name)
            except Exception as e:
                if hasattr(e, 'args'):

                    # make sure i is defined
                    if i is not None:
                        k = res_index[i]
                        e.args = e.args + ('occurred at index %s' %
                                           pprint_thing(k), )
                raise

        self.results = results
        self.res_index = res_index
        self.res_columns = self.result_columns
Example #25
0
 def _gen_dtypes(self) -> Iterator[str]:
     """Iterator with string representation of column dtypes."""
     for dtype in self.dtypes:
         yield pprint_thing(dtype)
Example #26
0
 def __str__(self):
     return printing.pprint_thing(self.terms)
Example #27
0
def _print_as_set(s):
    return '{%s}' % ', '.join([pprint_thing(el) for el in s])
Example #28
0
    def _make_plot(self):
        import matplotlib as mpl

        colors = self._get_colors()
        ncolors = len(colors)

        pos_prior = neg_prior = np.zeros(len(self.data))
        K = self.nseries

        for i, (label, y) in enumerate(self._iter_data(fillna=0)):
            ax = self._get_ax(i)
            kwds = self.kwds.copy()
            if self._is_series:
                kwds["color"] = colors
            else:
                kwds["color"] = colors[i % ncolors]

            errors = self._get_errorbars(label=label, index=i)
            kwds = dict(kwds, **errors)

            label = pprint_thing(label)

            if (("yerr" in kwds) or
                ("xerr" in kwds)) and (kwds.get("ecolor") is None):
                kwds["ecolor"] = mpl.rcParams["xtick.color"]

            start = 0
            if self.log and (y >= 1).all():
                start = 1
            start = start + self._start_base

            if self.subplots:
                w = self.bar_width / 2
                rect = self._plot(ax,
                                  self.ax_pos + w,
                                  y,
                                  self.bar_width,
                                  start=start,
                                  label=label,
                                  log=self.log,
                                  **kwds)
                ax.set_title(label)
            elif self.stacked:
                mask = y > 0
                start = np.where(mask, pos_prior, neg_prior) + self._start_base
                w = self.bar_width / 2
                rect = self._plot(ax,
                                  self.ax_pos + w,
                                  y,
                                  self.bar_width,
                                  start=start,
                                  label=label,
                                  log=self.log,
                                  **kwds)
                pos_prior = pos_prior + np.where(mask, y, 0)
                neg_prior = neg_prior + np.where(mask, 0, y)
            else:
                w = self.bar_width / K
                rect = self._plot(ax,
                                  self.ax_pos + (i + 0.5) * w,
                                  y,
                                  w,
                                  start=start,
                                  label=label,
                                  log=self.log,
                                  **kwds)
            self._add_legend_handle(rect, label, index=i)
Example #29
0
 def get_label(i):
     try:
         return pprint_thing(data.index[i])
     except Exception:
         return ""
Example #30
0
 def write(self, s: Any, indent: int = 0) -> None:
     rs = pprint_thing(s)
     self.elements.append(" " * indent + rs)
Example #31
0
 def _post_plot_logic(self, ax, data):
     x, y = self.x, self.y
     ax.set_ylabel(pprint_thing(y))
     ax.set_xlabel(pprint_thing(x))
Example #32
0
    def __init__(
        self,
        index: Index,
        grouper=None,
        obj: NDFrame | None = None,
        level=None,
        sort: bool = True,
        observed: bool = False,
        in_axis: bool = False,
        dropna: bool = True,
    ) -> None:
        self.level = level
        self._orig_grouper = grouper
        self.grouping_vector = _convert_grouper(index, grouper)
        self._all_grouper = None
        self._index = index
        self._sort = sort
        self.obj = obj
        self._observed = observed
        self.in_axis = in_axis
        self._dropna = dropna

        self._passed_categorical = False

        # we have a single grouper which may be a myriad of things,
        # some of which are dependent on the passing in level

        ilevel = self._ilevel
        if ilevel is not None:
            mapper = self.grouping_vector
            # In extant tests, the new self.grouping_vector matches
            #  `index.get_level_values(ilevel)` whenever
            #  mapper is None and isinstance(index, MultiIndex)
            (
                self.grouping_vector,  # Index
                self._codes,
                self._group_index,
            ) = index._get_grouper_for_level(mapper, level=ilevel)

        # a passed Grouper like, directly get the grouper in the same way
        # as single grouper groupby, use the group_info to get codes
        elif isinstance(self.grouping_vector, Grouper):
            # get the new grouper; we already have disambiguated
            # what key/level refer to exactly, don't need to
            # check again as we have by this point converted these
            # to an actual value (rather than a pd.Grouper)
            assert self.obj is not None  # for mypy
            _, newgrouper, newobj = self.grouping_vector._get_grouper(
                self.obj, validate=False)
            self.obj = newobj

            ng = newgrouper._get_grouper()
            if isinstance(newgrouper, ops.BinGrouper):
                # in this case we have `ng is newgrouper`
                self.grouping_vector = ng
            else:
                # ops.BaseGrouper
                # use Index instead of ndarray so we can recover the name
                self.grouping_vector = Index(ng,
                                             name=newgrouper.result_index.name)

        elif is_categorical_dtype(self.grouping_vector):
            # a passed Categorical
            self._passed_categorical = True

            self.grouping_vector, self._all_grouper = recode_for_groupby(
                self.grouping_vector, sort, observed)

        elif not isinstance(self.grouping_vector,
                            (Series, Index, ExtensionArray, np.ndarray)):
            # no level passed
            if getattr(self.grouping_vector, "ndim", 1) != 1:
                t = self.name or str(type(self.grouping_vector))
                raise ValueError(f"Grouper for '{t}' not 1-dimensional")

            self.grouping_vector = index.map(self.grouping_vector)

            if not (hasattr(self.grouping_vector, "__len__")
                    and len(self.grouping_vector) == len(index)):
                grper = pprint_thing(self.grouping_vector)
                errmsg = ("Grouper result violates len(labels) == "
                          f"len(data)\nresult: {grper}")
                self.grouping_vector = None  # Try for sanity
                raise AssertionError(errmsg)

        if isinstance(self.grouping_vector, np.ndarray):
            # if we have a date/time-like grouper, make sure that we have
            # Timestamps like
            self.grouping_vector = sanitize_to_nanoseconds(
                self.grouping_vector)
Example #33
0
    def test_arith_flex_frame(self):
        ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod']
        if not compat.PY3:
            aliases = {}
        else:
            aliases = {'div': 'truediv'}

        for op in ops:
            try:
                alias = aliases.get(op, op)
                f = getattr(operator, alias)
                result = getattr(self.frame, op)(2 * self.frame)
                exp = f(self.frame, 2 * self.frame)
                assert_frame_equal(result, exp)

                # vs mix float
                result = getattr(self.mixed_float, op)(2 * self.mixed_float)
                exp = f(self.mixed_float, 2 * self.mixed_float)
                assert_frame_equal(result, exp)
                _check_mixed_float(result, dtype=dict(C=None))

                # vs mix int
                if op in ['add', 'sub', 'mul']:
                    result = getattr(self.mixed_int, op)(2 + self.mixed_int)
                    exp = f(self.mixed_int, 2 + self.mixed_int)

                    # no overflow in the uint
                    dtype = None
                    if op in ['sub']:
                        dtype = dict(B='uint64', C=None)
                    elif op in ['add', 'mul']:
                        dtype = dict(C=None)
                    assert_frame_equal(result, exp)
                    _check_mixed_int(result, dtype=dtype)

                    # rops
                    r_f = lambda x, y: f(y, x)
                    result = getattr(self.frame, 'r' + op)(2 * self.frame)
                    exp = r_f(self.frame, 2 * self.frame)
                    assert_frame_equal(result, exp)

                    # vs mix float
                    result = getattr(self.mixed_float,
                                     op)(2 * self.mixed_float)
                    exp = f(self.mixed_float, 2 * self.mixed_float)
                    assert_frame_equal(result, exp)
                    _check_mixed_float(result, dtype=dict(C=None))

                    result = getattr(self.intframe, op)(2 * self.intframe)
                    exp = f(self.intframe, 2 * self.intframe)
                    assert_frame_equal(result, exp)

                    # vs mix int
                    if op in ['add', 'sub', 'mul']:
                        result = getattr(self.mixed_int,
                                         op)(2 + self.mixed_int)
                        exp = f(self.mixed_int, 2 + self.mixed_int)

                        # no overflow in the uint
                        dtype = None
                        if op in ['sub']:
                            dtype = dict(B='uint64', C=None)
                        elif op in ['add', 'mul']:
                            dtype = dict(C=None)
                        assert_frame_equal(result, exp)
                        _check_mixed_int(result, dtype=dtype)
            except:
                printing.pprint_thing("Failing operation %r" % op)
                raise

            # ndim >= 3
            ndim_5 = np.ones(self.frame.shape + (3, 4, 5))
            msg = "Unable to coerce to Series/DataFrame"
            with tm.assert_raises_regex(ValueError, msg):
                f(self.frame, ndim_5)

            with tm.assert_raises_regex(ValueError, msg):
                getattr(self.frame, op)(ndim_5)

        # res_add = self.frame.add(self.frame)
        # res_sub = self.frame.sub(self.frame)
        # res_mul = self.frame.mul(self.frame)
        # res_div = self.frame.div(2 * self.frame)

        # assert_frame_equal(res_add, self.frame + self.frame)
        # assert_frame_equal(res_sub, self.frame - self.frame)
        # assert_frame_equal(res_mul, self.frame * self.frame)
        # assert_frame_equal(res_div, self.frame / (2 * self.frame))

        const_add = self.frame.add(1)
        assert_frame_equal(const_add, self.frame + 1)

        # corner cases
        result = self.frame.add(self.frame[:0])
        assert_frame_equal(result, self.frame * np.nan)

        result = self.frame[:0].add(self.frame)
        assert_frame_equal(result, self.frame * np.nan)
        with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
            self.frame.add(self.frame.iloc[0], fill_value=3)
        with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
            self.frame.add(self.frame.iloc[0], axis='index', fill_value=3)
Example #34
0
    def __init__(
        self,
        index: Index,
        grouper=None,
        obj: Optional[FrameOrSeries] = None,
        name=None,
        level=None,
        sort: bool = True,
        observed: bool = False,
        in_axis: bool = False,
        dropna: bool = True,
    ):
        self.name = name
        self.level = level
        self.grouper = _convert_grouper(index, grouper)
        self.all_grouper = None
        self.index = index
        self.sort = sort
        self.obj = obj
        self.observed = observed
        self.in_axis = in_axis
        self.dropna = dropna

        # right place for this?
        if isinstance(grouper, (Series, Index)) and name is None:
            self.name = grouper.name

        if isinstance(grouper, MultiIndex):
            self.grouper = grouper._values

        # we have a single grouper which may be a myriad of things,
        # some of which are dependent on the passing in level

        if level is not None:
            if not isinstance(level, int):
                if level not in index.names:
                    raise AssertionError(f"Level {level} not in index")
                level = index.names.index(level)

            if self.name is None:
                self.name = index.names[level]

            (
                self.grouper,
                self._codes,
                self._group_index,
            ) = index._get_grouper_for_level(self.grouper, level)

        # a passed Grouper like, directly get the grouper in the same way
        # as single grouper groupby, use the group_info to get codes
        elif isinstance(self.grouper, Grouper):
            # get the new grouper; we already have disambiguated
            # what key/level refer to exactly, don't need to
            # check again as we have by this point converted these
            # to an actual value (rather than a pd.Grouper)
            _, grouper, _ = self.grouper._get_grouper(self.obj, validate=False)
            if self.name is None:
                self.name = grouper.result_index.name
            self.obj = self.grouper.obj
            self.grouper = grouper._get_grouper()

        else:
            if self.grouper is None and self.name is not None and self.obj is not None:
                self.grouper = self.obj[self.name]

            elif isinstance(self.grouper, (list, tuple)):
                self.grouper = com.asarray_tuplesafe(self.grouper)

            # a passed Categorical
            elif is_categorical_dtype(self.grouper):

                self.grouper, self.all_grouper = recode_for_groupby(
                    self.grouper, self.sort, observed
                )
                categories = self.grouper.categories

                # we make a CategoricalIndex out of the cat grouper
                # preserving the categories / ordered attributes
                self._codes = self.grouper.codes
                if observed:
                    codes = algorithms.unique1d(self.grouper.codes)
                    codes = codes[codes != -1]
                    if sort or self.grouper.ordered:
                        codes = np.sort(codes)
                else:
                    codes = np.arange(len(categories))

                self._group_index = CategoricalIndex(
                    Categorical.from_codes(
                        codes=codes, categories=categories, ordered=self.grouper.ordered
                    ),
                    name=self.name,
                )

            # we are done
            if isinstance(self.grouper, Grouping):
                self.grouper = self.grouper.grouper

            # no level passed
            elif not isinstance(
                self.grouper, (Series, Index, ExtensionArray, np.ndarray)
            ):
                if getattr(self.grouper, "ndim", 1) != 1:
                    t = self.name or str(type(self.grouper))
                    raise ValueError(f"Grouper for '{t}' not 1-dimensional")
                self.grouper = self.index.map(self.grouper)
                if not (
                    hasattr(self.grouper, "__len__")
                    and len(self.grouper) == len(self.index)
                ):
                    grper = pprint_thing(self.grouper)
                    errmsg = (
                        "Grouper result violates len(labels) == "
                        f"len(data)\nresult: {grper}"
                    )
                    self.grouper = None  # Try for sanity
                    raise AssertionError(errmsg)

        # if we have a date/time-like grouper, make sure that we have
        # Timestamps like
        if getattr(self.grouper, "dtype", None) is not None:
            if is_datetime64_dtype(self.grouper):
                self.grouper = self.grouper.astype("datetime64[ns]")
            elif is_timedelta64_dtype(self.grouper):

                self.grouper = self.grouper.astype("timedelta64[ns]")
Example #35
0
def _print_as_set(s):
    return '{%s}' % ', '.join([pprint_thing(el) for el in s])
Example #36
0
def parallel_coordinates(
    frame: DataFrame,
    class_column,
    cols=None,
    ax: Optional[Axes] = None,
    color=None,
    use_columns=False,
    xticks=None,
    colormap=None,
    axvlines: bool = True,
    axvlines_kwds=None,
    sort_labels: bool = False,
    **kwds,
) -> Axes:
    import matplotlib.pyplot as plt

    if axvlines_kwds is None:
        axvlines_kwds = {"linewidth": 1, "color": "black"}

    n = len(frame)
    classes = frame[class_column].drop_duplicates()
    class_col = frame[class_column]

    if cols is None:
        df = frame.drop(class_column, axis=1)
    else:
        df = frame[cols]

    used_legends: Set[str] = set()

    ncols = len(df.columns)

    # determine values to use for xticks
    if use_columns is True:
        if not np.all(np.isreal(list(df.columns))):
            raise ValueError("Columns must be numeric to be used as xticks")
        x = df.columns
    elif xticks is not None:
        if not np.all(np.isreal(xticks)):
            raise ValueError("xticks specified must be numeric")
        elif len(xticks) != ncols:
            raise ValueError("Length of xticks must match number of columns")
        x = xticks
    else:
        x = list(range(ncols))

    if ax is None:
        ax = plt.gca()

    color_values = get_standard_colors(num_colors=len(classes),
                                       colormap=colormap,
                                       color_type="random",
                                       color=color)

    if sort_labels:
        classes = sorted(classes)
        color_values = sorted(color_values)
    colors = dict(zip(classes, color_values))

    for i in range(n):
        y = df.iloc[i].values
        kls = class_col.iat[i]
        label = pprint_thing(kls)
        if label not in used_legends:
            used_legends.add(label)
            ax.plot(x, y, color=colors[kls], label=label, **kwds)
        else:
            ax.plot(x, y, color=colors[kls], **kwds)

    if axvlines:
        for i in x:
            ax.axvline(i, **axvlines_kwds)

    ax.set_xticks(x)
    ax.set_xticklabels(df.columns)
    ax.set_xlim(x[0], x[-1])
    ax.legend(loc="upper right")
    ax.grid()
    return ax
Example #37
0
def andrews_curves(
    frame: DataFrame,
    class_column,
    ax: Optional[Axes] = None,
    samples: int = 200,
    color=None,
    colormap=None,
    **kwds,
) -> Axes:
    import matplotlib.pyplot as plt

    def function(amplitudes):
        def f(t):
            x1 = amplitudes[0]
            result = x1 / np.sqrt(2.0)

            # Take the rest of the coefficients and resize them
            # appropriately. Take a copy of amplitudes as otherwise numpy
            # deletes the element from amplitudes itself.
            coeffs = np.delete(np.copy(amplitudes), 0)
            coeffs.resize(int((coeffs.size + 1) / 2), 2)

            # Generate the harmonics and arguments for the sin and cos
            # functions.
            harmonics = np.arange(0, coeffs.shape[0]) + 1
            trig_args = np.outer(harmonics, t)

            result += np.sum(
                coeffs[:, 0, np.newaxis] * np.sin(trig_args) +
                coeffs[:, 1, np.newaxis] * np.cos(trig_args),
                axis=0,
            )
            return result

        return f

    n = len(frame)
    class_col = frame[class_column]
    classes = frame[class_column].drop_duplicates()
    df = frame.drop(class_column, axis=1)
    t = np.linspace(-np.pi, np.pi, samples)
    used_legends: Set[str] = set()

    color_values = get_standard_colors(num_colors=len(classes),
                                       colormap=colormap,
                                       color_type="random",
                                       color=color)
    colors = dict(zip(classes, color_values))
    if ax is None:
        ax = plt.gca()
        ax.set_xlim(-np.pi, np.pi)
    for i in range(n):
        row = df.iloc[i].values
        f = function(row)
        y = f(t)
        kls = class_col.iat[i]
        label = pprint_thing(kls)
        if label not in used_legends:
            used_legends.add(label)
            ax.plot(t, y, color=colors[kls], label=label, **kwds)
        else:
            ax.plot(t, y, color=colors[kls], **kwds)

    ax.legend(loc="upper right")
    ax.grid()
    return ax
Example #38
0
def radviz(
    frame: DataFrame,
    class_column,
    ax: Optional[Axes] = None,
    color=None,
    colormap=None,
    **kwds,
) -> Axes:
    import matplotlib.pyplot as plt

    def normalize(series):
        a = min(series)
        b = max(series)
        return (series - a) / (b - a)

    n = len(frame)
    classes = frame[class_column].drop_duplicates()
    class_col = frame[class_column]
    df = frame.drop(class_column, axis=1).apply(normalize)

    if ax is None:
        ax = plt.gca()
        ax.set_xlim(-1, 1)
        ax.set_ylim(-1, 1)

    to_plot: Dict[Hashable, List[List]] = {}
    colors = get_standard_colors(num_colors=len(classes),
                                 colormap=colormap,
                                 color_type="random",
                                 color=color)

    for kls in classes:
        to_plot[kls] = [[], []]

    m = len(frame.columns) - 1
    s = np.array([(np.cos(t), np.sin(t))
                  for t in [2 * np.pi * (i / m) for i in range(m)]])

    for i in range(n):
        row = df.iloc[i].values
        row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
        y = (s * row_).sum(axis=0) / row.sum()
        kls = class_col.iat[i]
        to_plot[kls][0].append(y[0])
        to_plot[kls][1].append(y[1])

    for i, kls in enumerate(classes):
        ax.scatter(
            to_plot[kls][0],
            to_plot[kls][1],
            color=colors[i],
            label=pprint_thing(kls),
            **kwds,
        )
    ax.legend()

    ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor="none"))

    for xy, name in zip(s, df.columns):

        ax.add_patch(patches.Circle(xy, radius=0.025, facecolor="gray"))

        if xy[0] < 0.0 and xy[1] < 0.0:
            ax.text(xy[0] - 0.025,
                    xy[1] - 0.025,
                    name,
                    ha="right",
                    va="top",
                    size="small")
        elif xy[0] < 0.0 and xy[1] >= 0.0:
            ax.text(
                xy[0] - 0.025,
                xy[1] + 0.025,
                name,
                ha="right",
                va="bottom",
                size="small",
            )
        elif xy[0] >= 0.0 and xy[1] < 0.0:
            ax.text(xy[0] + 0.025,
                    xy[1] - 0.025,
                    name,
                    ha="left",
                    va="top",
                    size="small")
        elif xy[0] >= 0.0 and xy[1] >= 0.0:
            ax.text(xy[0] + 0.025,
                    xy[1] + 0.025,
                    name,
                    ha="left",
                    va="bottom",
                    size="small")

    ax.axis("equal")
    return ax
Example #39
0
 def raiseException(df):
     pprint_thing("----------------------------------------")
     pprint_thing(df.to_string())
     raise TypeError("test")
Example #40
0
 def __unicode__(self):
     return '%s\nFill: %s\n%s' % (printing.pprint_thing(self),
                                  printing.pprint_thing(self.fill_value),
                                  printing.pprint_thing(self.sp_index))
Example #41
0
def _print_as_set(s) -> str:
    arg = ", ".join(pprint_thing(el) for el in s)
    return f"{{{arg}}}"
Example #42
0
 def __repr__(self):
     if self.terms is not None:
         return pprint_thing(self.terms)
     return pprint_thing(self.expr)
Example #43
0
def _print_as_set(s):
    return ('{' + '{arg}'.format(arg=', '.join(
        pprint_thing(el) for el in s)) + '}')
Example #44
0
 def __repr__(self):
     return pprint_thing(
         "[Condition : [{cond}]]".format(cond=self.condition))
Example #45
0
 def __repr__(self) -> str:
     operands = map(str, self.operands)
     return pprint_thing("{0}({1})".format(self.op, ",".join(operands)))
Example #46
0
def andrews_curves(frame, class_column, ax=None, samples=200, color=None,
                   colormap=None, **kwds):
    """
    Generates a matplotlib plot of Andrews curves, for visualising clusters of
    multivariate data.

    Andrews curves have the functional form:

    f(t) = x_1/sqrt(2) + x_2 sin(t) + x_3 cos(t) +
           x_4 sin(2t) + x_5 cos(2t) + ...

    Where x coefficients correspond to the values of each dimension and t is
    linearly spaced between -pi and +pi. Each row of frame then corresponds to
    a single curve.

    Parameters
    ----------
    frame : DataFrame
        Data to be plotted, preferably normalized to (0.0, 1.0)
    class_column : Name of the column containing class names
    ax : matplotlib axes object, default None
    samples : Number of points to plot in each curve
    color: list or tuple, optional
        Colors to use for the different classes
    colormap : str or matplotlib colormap object, default None
        Colormap to select colors from. If string, load colormap with that name
        from matplotlib.
    kwds: keywords
        Options to pass to matplotlib plotting method

    Returns
    -------
    ax: Matplotlib axis object

    """
    from math import sqrt, pi
    import matplotlib.pyplot as plt

    def function(amplitudes):
        def f(t):
            x1 = amplitudes[0]
            result = x1 / sqrt(2.0)

            # Take the rest of the coefficients and resize them
            # appropriately. Take a copy of amplitudes as otherwise numpy
            # deletes the element from amplitudes itself.
            coeffs = np.delete(np.copy(amplitudes), 0)
            coeffs.resize(int((coeffs.size + 1) / 2), 2)

            # Generate the harmonics and arguments for the sin and cos
            # functions.
            harmonics = np.arange(0, coeffs.shape[0]) + 1
            trig_args = np.outer(harmonics, t)

            result += np.sum(coeffs[:, 0, np.newaxis] * np.sin(trig_args) +
                             coeffs[:, 1, np.newaxis] * np.cos(trig_args),
                             axis=0)
            return result
        return f

    n = len(frame)
    class_col = frame[class_column]
    classes = frame[class_column].drop_duplicates()
    df = frame.drop(class_column, axis=1)
    t = np.linspace(-pi, pi, samples)
    used_legends = set([])

    color_values = _get_standard_colors(num_colors=len(classes),
                                        colormap=colormap, color_type='random',
                                        color=color)
    colors = dict(zip(classes, color_values))
    if ax is None:
        ax = plt.gca(xlim=(-pi, pi))
    for i in range(n):
        row = df.iloc[i].values
        f = function(row)
        y = f(t)
        kls = class_col.iat[i]
        label = pprint_thing(kls)
        if label not in used_legends:
            used_legends.add(label)
            ax.plot(t, y, color=colors[kls], label=label, **kwds)
        else:
            ax.plot(t, y, color=colors[kls], **kwds)

    ax.legend(loc='upper right')
    ax.grid()
    return ax
Example #47
0
 def __repr__(self) -> str:
     return pprint_thing("{0}({1})".format(self.op, self.operand))
Example #48
0
 def __repr__(self) -> str:
     return pprint_thing(self.name)
Example #49
0
 def __unicode__(self):
     return '{self}\nFill: {fill}\n{index}'.format(
         self=printing.pprint_thing(self),
         fill=printing.pprint_thing(self.fill_value),
         index=printing.pprint_thing(self.sp_index))
Example #50
0
 def __unicode__(self):
     return pprint_thing(self, quote_strings=True,
                         escape_chars=('\t', '\r', '\n'))
Example #51
0
 def raiseException(df):
     pprint_thing('----------------------------------------')
     pprint_thing(df.to_string())
     raise TypeError('test')
Example #52
0
 def raw(self) -> str:
     return pprint_thing("{0}(name={1!r}, type={2})"
                         "".format(self.__class__.__name__, self.name,
                                   self.type))
Example #53
0
def radviz(frame, class_column, ax=None, color=None, colormap=None, **kwds):
    """
    Plot a multidimensional dataset in 2D.

    Each Series in the DataFrame is represented as a evenly distributed
    slice on a circle. Each data point is rendered in the circle according to
    the value on each Series. Highly correlated `Series` in the `DataFrame`
    are placed closer on the unit circle.

    RadViz allow to project a N-dimensional data set into a 2D space where the
    influence of each dimension can be interpreted as a balance between the
    influence of all dimensions.

    More info available at the `original article
    <http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.135.889>`_
    describing RadViz.

    Parameters
    ----------
    frame : `DataFrame`
        Pandas object holding the data.
    class_column : str
        Column name containing the name of the data point category.
    ax : :class:`matplotlib.axes.Axes`, optional
        A plot instance to which to add the information.
    color : list[str] or tuple[str], optional
        Assign a color to each category. Example: ['blue', 'green'].
    colormap : str or :class:`matplotlib.colors.Colormap`, default None
        Colormap to select colors from. If string, load colormap with that
        name from matplotlib.
    kwds : optional
        Options to pass to matplotlib scatter plotting method.

    Returns
    -------
    axes : :class:`matplotlib.axes.Axes`

    See Also
    --------
    pandas.plotting.andrews_curves : Plot clustering visualization

    Examples
    --------
    .. plot::
        :context: close-figs

        >>> df = pd.DataFrame({
        ...         'SepalLength': [6.5, 7.7, 5.1, 5.8, 7.6, 5.0, 5.4, 4.6,
        ...                         6.7, 4.6],
        ...         'SepalWidth': [3.0, 3.8, 3.8, 2.7, 3.0, 2.3, 3.0, 3.2,
        ...                        3.3, 3.6],
        ...         'PetalLength': [5.5, 6.7, 1.9, 5.1, 6.6, 3.3, 4.5, 1.4,
        ...                         5.7, 1.0],
        ...         'PetalWidth': [1.8, 2.2, 0.4, 1.9, 2.1, 1.0, 1.5, 0.2,
        ...                        2.1, 0.2],
        ...         'Category': ['virginica', 'virginica', 'setosa',
        ...                      'virginica', 'virginica', 'versicolor',
        ...                      'versicolor', 'setosa', 'virginica',
        ...                      'setosa']
        ...     })
        >>> rad_viz = pd.plotting.radviz(df, 'Category')
    """
    import matplotlib.pyplot as plt
    import matplotlib.patches as patches

    def normalize(series):
        a = min(series)
        b = max(series)
        return (series - a) / (b - a)

    n = len(frame)
    classes = frame[class_column].drop_duplicates()
    class_col = frame[class_column]
    df = frame.drop(class_column, axis=1).apply(normalize)

    if ax is None:
        ax = plt.gca(xlim=[-1, 1], ylim=[-1, 1])

    to_plot = {}
    colors = _get_standard_colors(num_colors=len(classes), colormap=colormap,
                                  color_type='random', color=color)

    for kls in classes:
        to_plot[kls] = [[], []]

    m = len(frame.columns) - 1
    s = np.array([(np.cos(t), np.sin(t))
                  for t in [2.0 * np.pi * (i / float(m))
                            for i in range(m)]])

    for i in range(n):
        row = df.iloc[i].values
        row_ = np.repeat(np.expand_dims(row, axis=1), 2, axis=1)
        y = (s * row_).sum(axis=0) / row.sum()
        kls = class_col.iat[i]
        to_plot[kls][0].append(y[0])
        to_plot[kls][1].append(y[1])

    for i, kls in enumerate(classes):
        ax.scatter(to_plot[kls][0], to_plot[kls][1], color=colors[i],
                   label=pprint_thing(kls), **kwds)
    ax.legend()

    ax.add_patch(patches.Circle((0.0, 0.0), radius=1.0, facecolor='none'))

    for xy, name in zip(s, df.columns):

        ax.add_patch(patches.Circle(xy, radius=0.025, facecolor='gray'))

        if xy[0] < 0.0 and xy[1] < 0.0:
            ax.text(xy[0] - 0.025, xy[1] - 0.025, name,
                    ha='right', va='top', size='small')
        elif xy[0] < 0.0 and xy[1] >= 0.0:
            ax.text(xy[0] - 0.025, xy[1] + 0.025, name,
                    ha='right', va='bottom', size='small')
        elif xy[0] >= 0.0 and xy[1] < 0.0:
            ax.text(xy[0] + 0.025, xy[1] - 0.025, name,
                    ha='left', va='top', size='small')
        elif xy[0] >= 0.0 and xy[1] >= 0.0:
            ax.text(xy[0] + 0.025, xy[1] + 0.025, name,
                    ha='left', va='bottom', size='small')

    ax.axis('equal')
    return ax
Example #54
0
 def _gen_columns(self) -> Iterator[str]:
     """Iterator with string representation of column names."""
     for col in self.ids:
         yield pprint_thing(col)
Example #55
0
def parallel_coordinates(frame, class_column, cols=None, ax=None, color=None,
                         use_columns=False, xticks=None, colormap=None,
                         axvlines=True, axvlines_kwds=None, sort_labels=False,
                         **kwds):
    """Parallel coordinates plotting.

    Parameters
    ----------
    frame: DataFrame
    class_column: str
        Column name containing class names
    cols: list, optional
        A list of column names to use
    ax: matplotlib.axis, optional
        matplotlib axis object
    color: list or tuple, optional
        Colors to use for the different classes
    use_columns: bool, optional
        If true, columns will be used as xticks
    xticks: list or tuple, optional
        A list of values to use for xticks
    colormap: str or matplotlib colormap, default None
        Colormap to use for line colors.
    axvlines: bool, optional
        If true, vertical lines will be added at each xtick
    axvlines_kwds: keywords, optional
        Options to be passed to axvline method for vertical lines
    sort_labels: bool, False
        Sort class_column labels, useful when assigning colors

        .. versionadded:: 0.20.0

    kwds: keywords
        Options to pass to matplotlib plotting method

    Returns
    -------
    ax: matplotlib axis object

    Examples
    --------
    >>> from matplotlib import pyplot as plt
    >>> df = pd.read_csv('https://raw.github.com/pandas-dev/pandas/master'
                        '/pandas/tests/data/iris.csv')
    >>> pd.plotting.parallel_coordinates(
            df, 'Name',
            color=('#556270', '#4ECDC4', '#C7F464'))
    >>> plt.show()
    """
    if axvlines_kwds is None:
        axvlines_kwds = {'linewidth': 1, 'color': 'black'}
    import matplotlib.pyplot as plt

    n = len(frame)
    classes = frame[class_column].drop_duplicates()
    class_col = frame[class_column]

    if cols is None:
        df = frame.drop(class_column, axis=1)
    else:
        df = frame[cols]

    used_legends = set([])

    ncols = len(df.columns)

    # determine values to use for xticks
    if use_columns is True:
        if not np.all(np.isreal(list(df.columns))):
            raise ValueError('Columns must be numeric to be used as xticks')
        x = df.columns
    elif xticks is not None:
        if not np.all(np.isreal(xticks)):
            raise ValueError('xticks specified must be numeric')
        elif len(xticks) != ncols:
            raise ValueError('Length of xticks must match number of columns')
        x = xticks
    else:
        x = lrange(ncols)

    if ax is None:
        ax = plt.gca()

    color_values = _get_standard_colors(num_colors=len(classes),
                                        colormap=colormap, color_type='random',
                                        color=color)

    if sort_labels:
        classes = sorted(classes)
        color_values = sorted(color_values)
    colors = dict(zip(classes, color_values))

    for i in range(n):
        y = df.iloc[i].values
        kls = class_col.iat[i]
        label = pprint_thing(kls)
        if label not in used_legends:
            used_legends.add(label)
            ax.plot(x, y, color=colors[kls], label=label, **kwds)
        else:
            ax.plot(x, y, color=colors[kls], **kwds)

    if axvlines:
        for i in x:
            ax.axvline(i, **axvlines_kwds)

    ax.set_xticks(x)
    ax.set_xticklabels(df.columns)
    ax.set_xlim(x[0], x[-1])
    ax.legend(loc='upper right')
    ax.grid()
    return ax
Example #56
0
 def __repr__(self):
     return pprint_thing("[Filter : [{lhs}] -> [{op}]".format(
         lhs=self.filter[0], op=self.filter[1]))
Example #57
0
 def f():
     if obj1:
         printing.pprint_thing("this works and shouldn't")
Example #58
0
 def __repr__(self) -> str:
     return printing.pprint_thing(self.terms)
Example #59
0
    def _write_col_header(self, indent: int) -> None:
        is_truncated_horizontally = self.fmt.is_truncated_horizontally
        if isinstance(self.columns, MultiIndex):
            template = 'colspan="{span:d}" halign="left"'

            if self.fmt.sparsify:
                # GH3547
                sentinel = lib.no_default
            else:
                sentinel = False
            levels = self.columns.format(sparsify=sentinel,
                                         adjoin=False,
                                         names=False)
            level_lengths = get_level_lengths(levels, sentinel)
            inner_lvl = len(level_lengths) - 1
            for lnum, (records,
                       values) in enumerate(zip(level_lengths, levels)):
                if is_truncated_horizontally:
                    # modify the header lines
                    ins_col = self.fmt.tr_col_num
                    if self.fmt.sparsify:
                        recs_new = {}
                        # Increment tags after ... col.
                        for tag, span in list(records.items()):
                            if tag >= ins_col:
                                recs_new[tag + 1] = span
                            elif tag + span > ins_col:
                                recs_new[tag] = span + 1
                                if lnum == inner_lvl:
                                    values = (values[:ins_col] + ("...", ) +
                                              values[ins_col:])
                                else:
                                    # sparse col headers do not receive a ...
                                    values = (values[:ins_col] +
                                              (values[ins_col - 1], ) +
                                              values[ins_col:])
                            else:
                                recs_new[tag] = span
                            # if ins_col lies between tags, all col headers
                            # get ...
                            if tag + span == ins_col:
                                recs_new[ins_col] = 1
                                values = values[:ins_col] + (
                                    "...", ) + values[ins_col:]
                        records = recs_new
                        inner_lvl = len(level_lengths) - 1
                        if lnum == inner_lvl:
                            records[ins_col] = 1
                    else:
                        recs_new = {}
                        for tag, span in list(records.items()):
                            if tag >= ins_col:
                                recs_new[tag + 1] = span
                            else:
                                recs_new[tag] = span
                        recs_new[ins_col] = 1
                        records = recs_new
                        values = values[:ins_col] + ["..."] + values[ins_col:]

                # see gh-22579
                # Column Offset Bug with to_html(index=False) with
                # MultiIndex Columns and Index.
                # Initially fill row with blank cells before column names.
                # TODO: Refactor to remove code duplication with code
                # block below for standard columns index.
                row = [""] * (self.row_levels - 1)
                if self.fmt.index or self.show_col_idx_names:
                    # see gh-22747
                    # If to_html(index_names=False) do not show columns
                    # index names.
                    # TODO: Refactor to use _get_column_name_list from
                    # DataFrameFormatter class and create a
                    # _get_formatted_column_labels function for code
                    # parity with DataFrameFormatter class.
                    if self.fmt.show_index_names:
                        name = self.columns.names[lnum]
                        row.append(pprint_thing(name or ""))
                    else:
                        row.append("")

                tags = {}
                j = len(row)
                for i, v in enumerate(values):
                    if i in records:
                        if records[i] > 1:
                            tags[j] = template.format(span=records[i])
                    else:
                        continue
                    j += 1
                    row.append(v)
                self.write_tr(row,
                              indent,
                              self.indent_delta,
                              tags=tags,
                              header=True)
        else:
            # see gh-22579
            # Column misalignment also occurs for
            # a standard index when the columns index is named.
            # Initially fill row with blank cells before column names.
            # TODO: Refactor to remove code duplication with code block
            # above for columns MultiIndex.
            row = [""] * (self.row_levels - 1)
            if self.fmt.index or self.show_col_idx_names:
                # see gh-22747
                # If to_html(index_names=False) do not show columns
                # index names.
                # TODO: Refactor to use _get_column_name_list from
                # DataFrameFormatter class.
                if self.fmt.show_index_names:
                    row.append(self.columns.name or "")
                else:
                    row.append("")
            row.extend(self._get_columns_formatted_values())
            align = self.fmt.justify

            if is_truncated_horizontally:
                ins_col = self.row_levels + self.fmt.tr_col_num
                row.insert(ins_col, "...")

            self.write_tr(row,
                          indent,
                          self.indent_delta,
                          header=True,
                          align=align)
Example #60
0
    def test_arith_flex_frame(self):
        seriesd = tm.getSeriesData()
        frame = pd.DataFrame(seriesd).copy()

        mixed_float = pd.DataFrame({'A': frame['A'].copy().astype('float32'),
                                    'B': frame['B'].copy().astype('float32'),
                                    'C': frame['C'].copy().astype('float16'),
                                    'D': frame['D'].copy().astype('float64')})

        intframe = pd.DataFrame({k: v.astype(int)
                                 for k, v in seriesd.items()})
        mixed_int = pd.DataFrame({'A': intframe['A'].copy().astype('int32'),
                                  'B': np.ones(len(intframe), dtype='uint64'),
                                  'C': intframe['C'].copy().astype('uint8'),
                                  'D': intframe['D'].copy().astype('int64')})

        # force these all to int64 to avoid platform testing issues
        intframe = pd.DataFrame({c: s for c, s in intframe.items()},
                                dtype=np.int64)

        ops = ['add', 'sub', 'mul', 'div', 'truediv', 'pow', 'floordiv', 'mod']
        if not PY3:
            aliases = {}
        else:
            aliases = {'div': 'truediv'}

        for op in ops:
            try:
                alias = aliases.get(op, op)
                f = getattr(operator, alias)
                result = getattr(frame, op)(2 * frame)
                exp = f(frame, 2 * frame)
                tm.assert_frame_equal(result, exp)

                # vs mix float
                result = getattr(mixed_float, op)(2 * mixed_float)
                exp = f(mixed_float, 2 * mixed_float)
                tm.assert_frame_equal(result, exp)
                _check_mixed_float(result, dtype=dict(C=None))

                # vs mix int
                if op in ['add', 'sub', 'mul']:
                    result = getattr(mixed_int, op)(2 + mixed_int)
                    exp = f(mixed_int, 2 + mixed_int)

                    # no overflow in the uint
                    dtype = None
                    if op in ['sub']:
                        dtype = dict(B='uint64', C=None)
                    elif op in ['add', 'mul']:
                        dtype = dict(C=None)
                    tm.assert_frame_equal(result, exp)
                    _check_mixed_int(result, dtype=dtype)

                    # rops
                    r_f = lambda x, y: f(y, x)
                    result = getattr(frame, 'r' + op)(2 * frame)
                    exp = r_f(frame, 2 * frame)
                    tm.assert_frame_equal(result, exp)

                    # vs mix float
                    result = getattr(mixed_float, op)(2 * mixed_float)
                    exp = f(mixed_float, 2 * mixed_float)
                    tm.assert_frame_equal(result, exp)
                    _check_mixed_float(result, dtype=dict(C=None))

                    result = getattr(intframe, op)(2 * intframe)
                    exp = f(intframe, 2 * intframe)
                    tm.assert_frame_equal(result, exp)

                    # vs mix int
                    if op in ['add', 'sub', 'mul']:
                        result = getattr(mixed_int, op)(2 + mixed_int)
                        exp = f(mixed_int, 2 + mixed_int)

                        # no overflow in the uint
                        dtype = None
                        if op in ['sub']:
                            dtype = dict(B='uint64', C=None)
                        elif op in ['add', 'mul']:
                            dtype = dict(C=None)
                        tm.assert_frame_equal(result, exp)
                        _check_mixed_int(result, dtype=dtype)
            except:
                printing.pprint_thing("Failing operation %r" % op)
                raise

            # ndim >= 3
            ndim_5 = np.ones(frame.shape + (3, 4, 5))
            msg = "Unable to coerce to Series/DataFrame"
            with tm.assert_raises_regex(ValueError, msg):
                f(frame, ndim_5)

            with tm.assert_raises_regex(ValueError, msg):
                getattr(frame, op)(ndim_5)

        # res_add = frame.add(frame)
        # res_sub = frame.sub(frame)
        # res_mul = frame.mul(frame)
        # res_div = frame.div(2 * frame)

        # tm.assert_frame_equal(res_add, frame + frame)
        # tm.assert_frame_equal(res_sub, frame - frame)
        # tm.assert_frame_equal(res_mul, frame * frame)
        # tm.assert_frame_equal(res_div, frame / (2 * frame))

        const_add = frame.add(1)
        tm.assert_frame_equal(const_add, frame + 1)

        # corner cases
        result = frame.add(frame[:0])
        tm.assert_frame_equal(result, frame * np.nan)

        result = frame[:0].add(frame)
        tm.assert_frame_equal(result, frame * np.nan)
        with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
            frame.add(frame.iloc[0], fill_value=3)
        with tm.assert_raises_regex(NotImplementedError, 'fill_value'):
            frame.add(frame.iloc[0], axis='index', fill_value=3)