예제 #1
0
def create_nd_panel_factory(klass_name,
                            axis_orders,
                            axis_slices,
                            slicer,
                            axis_aliases=None,
                            stat_axis=2):
    """ manufacture a n-d class:

        parameters
        ----------
        klass_name  : the klass name
        axis_orders : the names of the axes in order (highest to lowest)
        axis_slices : a dictionary that defines how the axes map to the sliced axis
        slicer      : the class representing a slice of this panel
        axis_aliases: a dictionary defining aliases for various axes
                        default = { major : major_axis, minor : minor_axis }
        stat_axis   : the default statistic axis
                        default = 2
        het_axis    : the info axis


        returns
        -------
        a class object reprsenting this panel


    """

    # if slicer is a name, get the object
    if isinstance(slicer, basestring):
        import pandas
        try:
            slicer = getattr(pandas, slicer)
        except:
            raise Exception("cannot create this slicer [%s]" % slicer)

    # build the klass
    klass = type(klass_name, (slicer, ), {})

    # add the class variables
    klass._AXIS_ORDERS = axis_orders
    klass._AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(axis_orders)])
    klass._AXIS_ALIASES = axis_aliases or dict()
    klass._AXIS_NAMES = dict([(i, a) for i, a in enumerate(axis_orders)])
    klass._AXIS_SLICEMAP = axis_slices
    klass._AXIS_LEN = len(axis_orders)
    klass._default_stat_axis = stat_axis
    klass._het_axis = 0
    klass._info_axis = axis_orders[klass._het_axis]

    klass._constructor_sliced = slicer

    # add the axes
    for i, a in enumerate(axis_orders):
        setattr(klass, a, lib.AxisProperty(i))

    #### define the methods ####
    def __init__(self, *args, **kwargs):
        if not (kwargs.get('data') or len(args)):
            raise Exception("must supply at least a data argument to [%s]" %
                            klass_name)
        if 'copy' not in kwargs:
            kwargs['copy'] = False
        if 'dtype' not in kwargs:
            kwargs['dtype'] = None
        self._init_data(*args, **kwargs)

    klass.__init__ = __init__

    def _get_plane_axes(self, axis):

        axis = self._get_axis_name(axis)
        index = self._AXIS_ORDERS.index(axis)

        planes = []
        if index:
            planes.extend(self._AXIS_ORDERS[0:index])
        if index != self._AXIS_LEN:
            planes.extend(self._AXIS_ORDERS[index + 1:])

        return [getattr(self, p) for p in planes]

    klass._get_plane_axes = _get_plane_axes

    def _combine(self, other, func, axis=0):
        if isinstance(other, klass):
            return self._combine_with_constructor(other, func)
        return super(klass, self)._combine(other, func, axis=axis)

    klass._combine = _combine

    def _combine_with_constructor(self, other, func):

        # combine labels to form new axes
        new_axes = []
        for a in self._AXIS_ORDERS:
            new_axes.append(getattr(self, a) + getattr(other, a))

        # reindex: could check that everything's the same size, but forget it
        d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, new_axes)])
        d['copy'] = False
        this = self.reindex(**d)
        other = other.reindex(**d)

        result_values = func(this.values, other.values)

        return self._constructor(result_values, **d)

    klass._combine_with_constructor = _combine_with_constructor

    # set as NonImplemented operations which we don't support
    for f in [
            'to_frame', 'to_excel', 'to_sparse', 'groupby', 'join', 'filter',
            'dropna', 'shift'
    ]:

        def func(self, *args, **kwargs):
            raise NotImplementedError

        setattr(klass, f, func)

    # add the aggregate operations
    klass._add_aggregate_operations()

    return klass
예제 #2
0
파일: panelnd.py 프로젝트: MarkyV/pandas
def create_nd_panel_factory(klass_name,
                            axis_orders,
                            axis_slices,
                            slicer,
                            axis_aliases=None,
                            stat_axis=2):
    """ manufacture a n-d class:

        parameters
        ----------
        klass_name  : the klass name
        axis_orders : the names of the axes in order (highest to lowest)
        axis_slices : a dictionary that defines how the axes map to the sliced axis
        slicer      : the class representing a slice of this panel
        axis_aliases: a dictionary defining aliases for various axes 
                        default = { major : major_axis, minor : minor_axis }
        stat_axis   : the default statistic axis
                        default = 2
        het_axis    : the info axis


        returns
        -------
        a class object reprsenting this panel


    """

    # build the klass
    klass = type(klass_name, (slicer, ), {})

    # add the class variables
    klass._AXIS_ORDERS = axis_orders
    klass._AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(axis_orders)])
    klass._AXIS_ALIASES = axis_aliases or dict()
    klass._AXIS_NAMES = dict([(i, a) for i, a in enumerate(axis_orders)])
    klass._AXIS_SLICEMAP = axis_slices
    klass._AXIS_LEN = len(axis_orders)
    klass._default_stat_axis = stat_axis
    klass._het_axis = 0
    klass._info_axis = axis_orders[klass._het_axis]
    klass._constructor_sliced = slicer

    # add the axes
    for i, a in enumerate(axis_orders):
        setattr(klass, a, lib.AxisProperty(i))

    # define the __init__
    def __init__(self, *args, **kwargs):
        if not (kwargs.get('data') or len(args)):
            raise Exception("must supply at least a data argument to [%s]" %
                            klass_name)
        if 'copy' not in kwargs:
            kwargs['copy'] = False
        if 'dtype' not in kwargs:
            kwargs['dtype'] = None
        self._init_data(*args, **kwargs)

    klass.__init__ = __init__

    # define _get_place_axes
    def _get_plane_axes(self, axis):
        axis = self._get_axis_name(axis)
        index = self._AXIS_ORDERS.index(axis)

        planes = []
        if index:
            planes.extend(self._AXIS_ORDERS[0:index])
        if index != self._AXIS_LEN:
            planes.extend(self._AXIS_ORDERS[index:])

        return planes

    klass._get_plane_axes

    # remove these operations
    def to_frame(self, *args, **kwargs):
        raise NotImplementedError

    klass.to_frame = to_frame

    def to_excel(self, *args, **kwargs):
        raise NotImplementedError

    klass.to_excel = to_excel

    return klass
예제 #3
0
파일: panel.py 프로젝트: donroy81/pandas
class Panel(NDFrame):
    _AXIS_NUMBERS = {'items': 0, 'major_axis': 1, 'minor_axis': 2}

    _AXIS_ALIASES = {'major': 'major_axis', 'minor': 'minor_axis'}

    _AXIS_NAMES = {0: 'items', 1: 'major_axis', 2: 'minor_axis'}

    # major
    _default_stat_axis = 1
    _het_axis = 0

    items = lib.AxisProperty(0)
    major_axis = lib.AxisProperty(1)
    minor_axis = lib.AxisProperty(2)

    __add__ = _arith_method(operator.add, '__add__')
    __sub__ = _arith_method(operator.sub, '__sub__')
    __truediv__ = _arith_method(operator.truediv, '__truediv__')
    __floordiv__ = _arith_method(operator.floordiv, '__floordiv__')
    __mul__ = _arith_method(operator.mul, '__mul__')
    __pow__ = _arith_method(operator.pow, '__pow__')

    __radd__ = _arith_method(operator.add, '__radd__')
    __rmul__ = _arith_method(operator.mul, '__rmul__')
    __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__')
    __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__')
    __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__')
    __rpow__ = _arith_method(lambda x, y: y**x, '__rpow__')

    if not py3compat.PY3:
        __div__ = _arith_method(operator.div, '__div__')
        __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__')

    def __init__(self,
                 data=None,
                 items=None,
                 major_axis=None,
                 minor_axis=None,
                 copy=False,
                 dtype=None):
        """
        Represents wide format panel data, stored as 3-dimensional array

        Parameters
        ----------
        data : ndarray (items x major x minor), or dict of DataFrames
        items : Index or array-like
            axis=1
        major_axis : Index or array-like
            axis=1
        minor_axis : Index or array-like
            axis=2
        dtype : dtype, default None
            Data type to force, otherwise infer
        copy : boolean, default False
            Copy data from inputs. Only affects DataFrame / 2d ndarray input
        """
        if data is None:
            data = {}

        passed_axes = [items, major_axis, minor_axis]
        axes = None
        if isinstance(data, BlockManager):
            if any(x is not None for x in passed_axes):
                axes = [
                    x if x is not None else y
                    for x, y in zip(passed_axes, data.axes)
                ]
            mgr = data
        elif isinstance(data, dict):
            mgr = self._init_dict(data, passed_axes, dtype=dtype)
            copy = False
            dtype = None
        elif isinstance(data, (np.ndarray, list)):
            mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy)
            copy = False
            dtype = None
        else:  # pragma: no cover
            raise PandasError('Panel constructor not properly called!')

        NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype)

    @classmethod
    def _from_axes(cls, data, axes):
        # for construction from BlockManager
        if isinstance(data, BlockManager):
            return cls(data)
        else:
            items, major, minor = axes
            return cls(data,
                       items=items,
                       major_axis=major,
                       minor_axis=minor,
                       copy=False)

    def _init_dict(self, data, axes, dtype=None):
        items, major, minor = axes

        # prefilter if items passed
        if items is not None:
            items = _ensure_index(items)
            data = dict((k, v) for k, v in data.iteritems() if k in items)
        else:
            items = Index(_try_sort(data.keys()))

        for k, v in data.iteritems():
            if isinstance(v, dict):
                data[k] = DataFrame(v)

        if major is None:
            major = _extract_axis(data, axis=0)

        if minor is None:
            minor = _extract_axis(data, axis=1)

        axes = [items, major, minor]
        reshaped_data = data.copy()  # shallow

        item_shape = len(major), len(minor)
        for item in items:
            v = values = data.get(item)
            if v is None:
                values = np.empty(item_shape, dtype=dtype)
                values.fill(np.nan)
            elif isinstance(v, DataFrame):
                v = v.reindex(index=major, columns=minor, copy=False)
                if dtype is not None:
                    v = v.astype(dtype)
                values = v.values
            reshaped_data[item] = values

        # segregates dtypes and forms blocks matching to columns
        blocks = form_blocks(reshaped_data, axes)
        mgr = BlockManager(blocks, axes).consolidate()
        return mgr

    @property
    def shape(self):
        return len(self.items), len(self.major_axis), len(self.minor_axis)

    @classmethod
    def from_dict(cls, data, intersect=False, orient='items', dtype=None):
        """
        Construct Panel from dict of DataFrame objects

        Parameters
        ----------
        data : dict
            {field : DataFrame}
        intersect : boolean
            Intersect indexes of input DataFrames
        orient : {'items', 'minor'}, default 'items'
            The "orientation" of the data. If the keys of the passed dict
            should be the items of the result panel, pass 'items'
            (default). Otherwise if the columns of the values of the passed
            DataFrame objects should be the items (which in the case of
            mixed-dtype data you should do), instead pass 'minor'


        Returns
        -------
        Panel
        """
        from collections import defaultdict

        orient = orient.lower()
        if orient == 'minor':
            new_data = defaultdict(dict)
            for col, df in data.iteritems():
                for item, s in df.iteritems():
                    new_data[item][col] = s
            data = new_data
        elif orient != 'items':  # pragma: no cover
            raise ValueError('only recognize items or minor for orientation')

        data, index, columns = _homogenize_dict(data,
                                                intersect=intersect,
                                                dtype=dtype)
        items = Index(sorted(data.keys()))
        return cls(data, items, index, columns)

    def __getitem__(self, key):
        if isinstance(self.items, MultiIndex):
            return self._getitem_multilevel(key)
        return super(Panel, self).__getitem__(key)

    def _getitem_multilevel(self, key):
        loc = self.items.get_loc(key)
        if isinstance(loc, (slice, np.ndarray)):
            new_index = self.items[loc]
            result_index = _maybe_droplevels(new_index, key)
            new_values = self.values[loc, :, :]
            result = Panel(new_values,
                           items=result_index,
                           major_axis=self.major_axis,
                           minor_axis=self.minor_axis)
            return result
        else:
            return self._get_item_cache(key)

    def _init_matrix(self, data, axes, dtype=None, copy=False):
        values = _prep_ndarray(data, copy=copy)

        if dtype is not None:
            try:
                values = values.astype(dtype)
            except Exception:
                raise ValueError('failed to cast to %s' % dtype)

        shape = values.shape
        fixed_axes = []
        for i, ax in enumerate(axes):
            if ax is None:
                ax = _default_index(shape[i])
            else:
                ax = _ensure_index(ax)
            fixed_axes.append(ax)

        items = fixed_axes[0]
        block = make_block(values, items, items)
        return BlockManager([block], fixed_axes)

    #----------------------------------------------------------------------
    # Array interface

    def __array__(self, dtype=None):
        return self.values

    def __array_wrap__(self, result):
        return self._constructor(result,
                                 items=self.items,
                                 major_axis=self.major_axis,
                                 minor_axis=self.minor_axis,
                                 copy=False)

    #----------------------------------------------------------------------
    # Magic methods

    def __repr__(self):
        class_name = str(self.__class__)

        I, N, K = len(self.items), len(self.major_axis), len(self.minor_axis)

        dims = 'Dimensions: %d (items) x %d (major) x %d (minor)' % (I, N, K)

        if len(self.major_axis) > 0:
            major = 'Major axis: %s to %s' % (self.major_axis[0],
                                              self.major_axis[-1])
        else:
            major = 'Major axis: None'

        if len(self.minor_axis) > 0:
            minor = 'Minor axis: %s to %s' % (self.minor_axis[0],
                                              self.minor_axis[-1])
        else:
            minor = 'Minor axis: None'

        if len(self.items) > 0:
            items = 'Items: %s to %s' % (self.items[0], self.items[-1])
        else:
            items = 'Items: None'

        output = '%s\n%s\n%s\n%s\n%s' % (class_name, dims, items, major, minor)

        return output

    def __iter__(self):
        return iter(self.items)

    def iteritems(self):
        for item in self.items:
            yield item, self[item]

    # Name that won't get automatically converted to items by 2to3. items is
    # already in use for the first axis.
    iterkv = iteritems

    def _get_plane_axes(self, axis):
        """

        """
        axis = self._get_axis_name(axis)

        if axis == 'major_axis':
            index = self.minor_axis
            columns = self.items
        if axis == 'minor_axis':
            index = self.major_axis
            columns = self.items
        elif axis == 'items':
            index = self.major_axis
            columns = self.minor_axis

        return index, columns

    @property
    def _constructor(self):
        return type(self)

    # Fancy indexing
    _ix = None

    @property
    def ix(self):
        if self._ix is None:
            self._ix = _NDFrameIndexer(self)

        return self._ix

    def _wrap_array(self, arr, axes, copy=False):
        items, major, minor = axes
        return self._constructor(arr,
                                 items=items,
                                 major_axis=major,
                                 minor_axis=minor,
                                 copy=copy)

    fromDict = from_dict

    def to_sparse(self, fill_value=None, kind='block'):
        """
        Convert to SparsePanel

        Parameters
        ----------
        fill_value : float, default NaN
        kind : {'block', 'integer'}

        Returns
        -------
        y : SparseDataFrame
        """
        from pandas.core.sparse import SparsePanel
        frames = dict(self.iterkv())
        return SparsePanel(frames,
                           items=self.items,
                           major_axis=self.major_axis,
                           minor_axis=self.minor_axis,
                           default_kind=kind,
                           default_fill_value=fill_value)

    def to_excel(self, path, na_rep=''):
        """
        Write each DataFrame in Panel to a separate excel sheet

        Parameters
        ----------
        excel_writer : string or ExcelWriter object
            File path or existing ExcelWriter
        na_rep : string, default ''
            Missing data representation
        """
        from pandas.io.parsers import ExcelWriter
        writer = ExcelWriter(path)
        for item, df in self.iteritems():
            name = str(item)
            df.to_excel(writer, name, na_rep=na_rep)
        writer.save()

    # TODO: needed?
    def keys(self):
        return list(self.items)

    def _get_values(self):
        self._consolidate_inplace()
        return self._data.as_matrix()

    values = property(fget=_get_values)

    #----------------------------------------------------------------------
    # Getting and setting elements

    def get_value(self, item, major, minor):
        """
        Quickly retrieve single value at (item, major, minor) location

        Parameters
        ----------
        item : item label (panel item)
        major : major axis label (panel item row)
        minor : minor axis label (panel item column)

        Returns
        -------
        value : scalar value
        """
        # hm, two layers to the onion
        frame = self._get_item_cache(item)
        return frame.get_value(major, minor)

    def set_value(self, item, major, minor, value):
        """
        Quickly set single value at (item, major, minor) location

        Parameters
        ----------
        item : item label (panel item)
        major : major axis label (panel item row)
        minor : minor axis label (panel item column)
        value : scalar

        Returns
        -------
        panel : Panel
            If label combo is contained, will be reference to calling Panel,
            otherwise a new object
        """
        try:
            frame = self._get_item_cache(item)
            frame.set_value(major, minor, value)
            return self
        except KeyError:
            ax1, ax2, ax3 = self._expand_axes((item, major, minor))
            result = self.reindex(items=ax1, major=ax2, minor=ax3, copy=False)

            likely_dtype = com._infer_dtype(value)
            made_bigger = not np.array_equal(ax1, self.items)
            # how to make this logic simpler?
            if made_bigger:
                com._possibly_cast_item(result, item, likely_dtype)

            return result.set_value(item, major, minor, value)

    def _box_item_values(self, key, values):
        return DataFrame(values,
                         index=self.major_axis,
                         columns=self.minor_axis)

    def __getattr__(self, name):
        """After regular attribute access, try looking up the name of an item.
        This allows simpler access to items for interactive use."""
        if name in self.items:
            return self[name]
        raise AttributeError("'%s' object has no attribute '%s'" %
                             (type(self).__name__, name))

    def _slice(self, slobj, axis=0):
        new_data = self._data.get_slice(slobj, axis=axis)
        return self._constructor(new_data)

    def __setitem__(self, key, value):
        _, N, K = self.shape
        if isinstance(value, DataFrame):
            value = value.reindex(index=self.major_axis,
                                  columns=self.minor_axis)
            mat = value.values
        elif isinstance(value, np.ndarray):
            assert (value.shape == (N, K))
            mat = np.asarray(value)
        elif np.isscalar(value):
            dtype = _infer_dtype(value)
            mat = np.empty((N, K), dtype=dtype)
            mat.fill(value)

        mat = mat.reshape((1, N, K))
        NDFrame._set_item(self, key, mat)

    def pop(self, item):
        """
        Return item slice from panel and delete from panel

        Parameters
        ----------
        key : object
            Must be contained in panel's items

        Returns
        -------
        y : DataFrame
        """
        return NDFrame.pop(self, item)

    def __getstate__(self):
        "Returned pickled representation of the panel"
        return self._data

    def __setstate__(self, state):
        # old Panel pickle
        if isinstance(state, BlockManager):
            self._data = state
        elif len(state) == 4:  # pragma: no cover
            self._unpickle_panel_compat(state)
        else:  # pragma: no cover
            raise ValueError('unrecognized pickle')
        self._item_cache = {}

    def _unpickle_panel_compat(self, state):  # pragma: no cover
        "Unpickle the panel"
        _unpickle = com._unpickle_array
        vals, items, major, minor = state

        items = _unpickle(items)
        major = _unpickle(major)
        minor = _unpickle(minor)
        values = _unpickle(vals)
        wp = Panel(values, items, major, minor)
        self._data = wp._data

    def conform(self, frame, axis='items'):
        """
        Conform input DataFrame to align with chosen axis pair.

        Parameters
        ----------
        frame : DataFrame
        axis : {'items', 'major', 'minor'}

            Axis the input corresponds to. E.g., if axis='major', then
            the frame's columns would be items, and the index would be
            values of the minor axis

        Returns
        -------
        DataFrame
        """
        index, columns = self._get_plane_axes(axis)
        return frame.reindex(index=index, columns=columns)

    def reindex(self,
                major=None,
                items=None,
                minor=None,
                method=None,
                major_axis=None,
                minor_axis=None,
                copy=True):
        """
        Conform panel to new axis or axes

        Parameters
        ----------
        major : Index or sequence, default None
            Can also use 'major_axis' keyword
        items : Index or sequence, default None
        minor : Index or sequence, default None
            Can also use 'minor_axis' keyword
        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
            Method to use for filling holes in reindexed Series

            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap
        copy : boolean, default True
            Return a new object, even if the passed indexes are the same

        Returns
        -------
        Panel (new object)
        """
        result = self

        major = _mut_exclusive(major, major_axis)
        minor = _mut_exclusive(minor, minor_axis)

        if (method is None and not self._is_mixed_type
                and com._count_not_none(items, major, minor) == 3):
            return self._reindex_multi(items, major, minor)

        if major is not None:
            result = result._reindex_axis(major, method, 1, copy)

        if minor is not None:
            result = result._reindex_axis(minor, method, 2, copy)

        if items is not None:
            result = result._reindex_axis(items, method, 0, copy)

        if result is self and copy:
            raise ValueError('Must specify at least one axis')

        return result

    def _reindex_multi(self, items, major, minor):
        a0, a1, a2 = len(items), len(major), len(minor)

        values = self.values
        new_values = np.empty((a0, a1, a2), dtype=values.dtype)

        new_items, indexer0 = self.items.reindex(items)
        new_major, indexer1 = self.major_axis.reindex(major)
        new_minor, indexer2 = self.minor_axis.reindex(minor)

        if indexer0 is None:
            indexer0 = range(len(new_items))

        if indexer1 is None:
            indexer1 = range(len(new_major))

        if indexer2 is None:
            indexer2 = range(len(new_minor))

        for i, ind in enumerate(indexer0):
            com.take_2d_multi(values[ind],
                              indexer1,
                              indexer2,
                              out=new_values[i])

        return Panel(new_values,
                     items=new_items,
                     major_axis=new_major,
                     minor_axis=new_minor)

    def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True):
        """Conform Panel to new index with optional filling logic, placing
        NA/NaN in locations having no value in the previous index. A new object
        is produced unless the new index is equivalent to the current one and
        copy=False

        Parameters
        ----------
        index : array-like, optional
            New labels / index to conform to. Preferably an Index object to
            avoid duplicating data
        axis : {0, 1}
            0 -> index (rows)
            1 -> columns
        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
            Method to use for filling holes in reindexed DataFrame
            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap
        copy : boolean, default True
            Return a new object, even if the passed indexes are the same
        level : int or name
            Broadcast across a level, matching Index values on the
            passed MultiIndex level

        Returns
        -------
        reindexed : Panel
        """
        self._consolidate_inplace()
        return self._reindex_axis(labels, method, axis, copy)

    def reindex_like(self, other, method=None):
        """
        Reindex Panel to match indices of another Panel

        Parameters
        ----------
        other : Panel
        method : string or None

        Returns
        -------
        reindexed : Panel
        """
        # todo: object columns
        return self.reindex(major=other.major_axis,
                            items=other.items,
                            minor=other.minor_axis,
                            method=method)

    def dropna(self, axis=0, how='any'):
        """
        Drop 2D from panel, holding passed axis constant

        Parameters
        ----------
        axis : int, default 0
            Axis to hold constant. E.g. axis=1 will drop major_axis entries
            having a certain amount of NA data
        how : {'all', 'any'}, default 'any'
            'any': one or more values are NA in the DataFrame along the
            axis. For 'all' they all must be.

        Returns
        -------
        dropped : Panel
        """
        axis = self._get_axis_number(axis)

        values = self.values
        mask = com.notnull(values)

        for ax in reversed(sorted(set(range(3)) - set([axis]))):
            mask = mask.sum(ax)

        per_slice = np.prod(values.shape[:axis] + values.shape[axis + 1:])

        if how == 'all':
            cond = mask > 0
        else:
            cond = mask == per_slice

        new_ax = self._get_axis(axis)[cond]
        return self.reindex_axis(new_ax, axis=axis)

    def _combine(self, other, func, axis=0):
        if isinstance(other, Panel):
            return self._combine_panel(other, func)
        elif isinstance(other, DataFrame):
            return self._combine_frame(other, func, axis=axis)
        elif np.isscalar(other):
            new_values = func(self.values, other)
            return self._constructor(new_values, self.items, self.major_axis,
                                     self.minor_axis)

    def __neg__(self):
        return -1 * self

    def _combine_frame(self, other, func, axis=0):
        index, columns = self._get_plane_axes(axis)
        axis = self._get_axis_number(axis)

        other = other.reindex(index=index, columns=columns)

        if axis == 0:
            new_values = func(self.values, other.values)
        elif axis == 1:
            new_values = func(self.values.swapaxes(0, 1), other.values.T)
            new_values = new_values.swapaxes(0, 1)
        elif axis == 2:
            new_values = func(self.values.swapaxes(0, 2), other.values)
            new_values = new_values.swapaxes(0, 2)

        return self._constructor(new_values, self.items, self.major_axis,
                                 self.minor_axis)

    def _combine_panel(self, other, func):
        items = self.items + other.items
        major = self.major_axis + other.major_axis
        minor = self.minor_axis + other.minor_axis

        # could check that everything's the same size, but forget it
        this = self.reindex(items=items, major=major, minor=minor)
        other = other.reindex(items=items, major=major, minor=minor)

        result_values = func(this.values, other.values)

        return self._constructor(result_values, items, major, minor)

    def fillna(self, value=None, method='pad'):
        """
        Fill NaN values using the specified method.

        Member Series / TimeSeries are filled separately.

        Parameters
        ----------
        value : any kind (should be same type as array)
            Value to use to fill holes (e.g. 0)

        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad'
            Method to use for filling holes in reindexed Series

            pad / ffill: propagate last valid observation forward to next valid
            backfill / bfill: use NEXT valid observation to fill gap

        Returns
        -------
        y : DataFrame

        See also
        --------
        DataFrame.reindex, DataFrame.asfreq
        """
        if value is None:
            result = {}
            for col, s in self.iterkv():
                result[col] = s.fillna(method=method, value=value)

            return self._constructor.from_dict(result)
        else:
            new_data = self._data.fillna(value)
            return self._constructor(new_data)

    add = _panel_arith_method(operator.add, 'add')
    subtract = sub = _panel_arith_method(operator.sub, 'subtract')
    multiply = mul = _panel_arith_method(operator.mul, 'multiply')

    try:
        divide = div = _panel_arith_method(operator.div, 'divide')
    except AttributeError:  # pragma: no cover
        # Python 3
        divide = div = _panel_arith_method(operator.truediv, 'divide')

    def major_xs(self, key, copy=True):
        """
        Return slice of panel along major axis

        Parameters
        ----------
        key : object
            Major axis label
        copy : boolean, default False
            Copy data

        Returns
        -------
        y : DataFrame
            index -> minor axis, columns -> items
        """
        return self.xs(key, axis=1, copy=copy)

    def minor_xs(self, key, copy=True):
        """
        Return slice of panel along minor axis

        Parameters
        ----------
        key : object
            Minor axis label
        copy : boolean, default False
            Copy data

        Returns
        -------
        y : DataFrame
            index -> major axis, columns -> items
        """
        return self.xs(key, axis=2, copy=copy)

    def xs(self, key, axis=1, copy=True):
        """
        Return slice of panel along selected axis

        Parameters
        ----------
        key : object
            Label
        axis : {'items', 'major', 'minor}, default 1/'major'

        Returns
        -------
        y : DataFrame
        """
        if axis == 0:
            data = self[key]
            if copy:
                data = data.copy()
            return data

        self._consolidate_inplace()
        axis_number = self._get_axis_number(axis)
        new_data = self._data.xs(key, axis=axis_number, copy=copy)
        return DataFrame(new_data)

    def _ixs(self, i, axis=0):
        # for compatibility with .ix indexing
        # Won't work with hierarchical indexing yet
        key = self._get_axis(axis)[i]
        return self.xs(key, axis=axis)

    def groupby(self, function, axis='major'):
        """
        Group data on given axis, returning GroupBy object

        Parameters
        ----------
        function : callable
            Mapping function for chosen access
        axis : {'major', 'minor', 'items'}, default 'major'

        Returns
        -------
        grouped : PanelGroupBy
        """
        from pandas.core.groupby import PanelGroupBy
        axis = self._get_axis_number(axis)
        return PanelGroupBy(self, function, axis=axis)

    def swapaxes(self, axis1='major', axis2='minor', copy=True):
        """
        Interchange axes and swap values axes appropriately

        Returns
        -------
        y : Panel (new object)
        """
        i = self._get_axis_number(axis1)
        j = self._get_axis_number(axis2)

        if i == j:
            raise ValueError('Cannot specify the same axis')

        mapping = {i: j, j: i}

        new_axes = (self._get_axis(mapping.get(k, k)) for k in range(3))
        new_values = self.values.swapaxes(i, j)
        if copy:
            new_values = new_values.copy()

        return self._constructor(new_values, *new_axes)

    def transpose(self,
                  items='items',
                  major='major',
                  minor='minor',
                  copy=False):
        """
        Permute the dimensions of the Panel

        Parameters
        ----------
        items : int or one of {'items', 'major', 'minor'}
        major : int or one of {'items', 'major', 'minor'}
        minor : int or one of {'items', 'major', 'minor'}
        copy : boolean, default False
            Make a copy of the underlying data. Mixed-dtype data will
            always result in a copy

        Examples
        --------
        >>> p.transpose(2, 0, 1)
        >>> p.transpose(2, 0, 1, copy=True)

        Returns
        -------
        y : Panel (new object)
        """
        i, j, k = [self._get_axis_number(x) for x in [items, major, minor]]

        if i == j or i == k or j == k:
            raise ValueError('Must specify 3 unique axes')

        new_axes = [self._get_axis(x) for x in [i, j, k]]
        new_values = self.values.transpose((i, j, k))
        if copy:
            new_values = new_values.copy()
        return self._constructor(new_values, *new_axes)

    def to_frame(self, filter_observations=True):
        """
        Transform wide format into long (stacked) format as DataFrame

        Parameters
        ----------
        filter_observations : boolean, default True
            Drop (major, minor) pairs without a complete set of observations
            across all the items

        Returns
        -------
        y : DataFrame
        """
        _, N, K = self.shape

        if filter_observations:
            mask = com.notnull(self.values).all(axis=0)
            # size = mask.sum()
            selector = mask.ravel()
        else:
            # size = N * K
            selector = slice(None, None)

        data = {}
        for item in self.items:
            data[item] = self[item].values.ravel()[selector]

        major_labels = np.arange(N).repeat(K)[selector]

        # Anyone think of a better way to do this? np.repeat does not
        # do what I want
        minor_labels = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)]
        minor_labels = minor_labels.ravel()[selector]

        index = MultiIndex(levels=[self.major_axis, self.minor_axis],
                           labels=[major_labels, minor_labels],
                           names=['major', 'minor'])

        return DataFrame(data, index=index, columns=self.items)

    to_long = deprecate('to_long', to_frame)
    toLong = deprecate('toLong', to_frame)

    def filter(self, items):
        """
        Restrict items in panel to input list

        Parameters
        ----------
        items : sequence

        Returns
        -------
        y : Panel
        """
        intersection = self.items.intersection(items)
        return self.reindex(items=intersection)

    def apply(self, func, axis='major'):
        """
        Apply

        Parameters
        ----------
        func : numpy function
            Signature should match numpy.{sum, mean, var, std} etc.
        axis : {'major', 'minor', 'items'}
        fill_value : boolean, default True
            Replace NaN values with specified first

        Returns
        -------
        result : DataFrame or Panel
        """
        i = self._get_axis_number(axis)
        result = np.apply_along_axis(func, i, self.values)
        return self._wrap_result(result, axis=axis)

    def _reduce(self, op, axis=0, skipna=True):
        axis_name = self._get_axis_name(axis)
        axis_number = self._get_axis_number(axis_name)
        f = lambda x: op(x, axis=axis_number, skipna=skipna)

        result = f(self.values)

        index, columns = self._get_plane_axes(axis_name)
        if axis_name != 'items':
            result = result.T

        return DataFrame(result, index=index, columns=columns)

    def _wrap_result(self, result, axis):
        axis = self._get_axis_name(axis)
        index, columns = self._get_plane_axes(axis)

        if axis != 'items':
            result = result.T

        return DataFrame(result, index=index, columns=columns)

    def count(self, axis='major'):
        """
        Return number of observations over requested axis.

        Parameters
        ----------
        axis : {'items', 'major', 'minor'} or {0, 1, 2}

        Returns
        -------
        count : DataFrame
        """
        i = self._get_axis_number(axis)

        values = self.values
        mask = np.isfinite(values)
        result = mask.sum(axis=i)

        return self._wrap_result(result, axis)

    @Substitution(desc='sum', outname='sum')
    @Appender(_agg_doc)
    def sum(self, axis='major', skipna=True):
        return self._reduce(nanops.nansum, axis=axis, skipna=skipna)

    @Substitution(desc='mean', outname='mean')
    @Appender(_agg_doc)
    def mean(self, axis='major', skipna=True):
        return self._reduce(nanops.nanmean, axis=axis, skipna=skipna)

    @Substitution(desc='unbiased variance', outname='variance')
    @Appender(_agg_doc)
    def var(self, axis='major', skipna=True):
        return self._reduce(nanops.nanvar, axis=axis, skipna=skipna)

    @Substitution(desc='unbiased standard deviation', outname='stdev')
    @Appender(_agg_doc)
    def std(self, axis='major', skipna=True):
        return self.var(axis=axis, skipna=skipna).apply(np.sqrt)

    @Substitution(desc='unbiased skewness', outname='skew')
    @Appender(_agg_doc)
    def skew(self, axis='major', skipna=True):
        return self._reduce(nanops.nanskew, axis=axis, skipna=skipna)

    @Substitution(desc='product', outname='prod')
    @Appender(_agg_doc)
    def prod(self, axis='major', skipna=True):
        return self._reduce(nanops.nanprod, axis=axis, skipna=skipna)

    @Substitution(desc='compounded percentage', outname='compounded')
    @Appender(_agg_doc)
    def compound(self, axis='major', skipna=True):
        return (1 + self).prod(axis=axis, skipna=skipna) - 1

    @Substitution(desc='median', outname='median')
    @Appender(_agg_doc)
    def median(self, axis='major', skipna=True):
        return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna)

    @Substitution(desc='maximum', outname='maximum')
    @Appender(_agg_doc)
    def max(self, axis='major', skipna=True):
        return self._reduce(nanops.nanmax, axis=axis, skipna=skipna)

    @Substitution(desc='minimum', outname='minimum')
    @Appender(_agg_doc)
    def min(self, axis='major', skipna=True):
        return self._reduce(nanops.nanmin, axis=axis, skipna=skipna)

    def shift(self, lags, axis='major'):
        """
        Shift major or minor axis by specified number of lags. Drops periods

        Parameters
        ----------
        lags : int
            Needs to be a positive number currently
        axis : {'major', 'minor'}

        Returns
        -------
        shifted : Panel
        """
        values = self.values
        items = self.items
        major_axis = self.major_axis
        minor_axis = self.minor_axis

        if axis == 'major':
            values = values[:, :-lags, :]
            major_axis = major_axis[lags:]
        elif axis == 'minor':
            values = values[:, :, :-lags]
            minor_axis = minor_axis[lags:]
        else:
            raise ValueError('Invalid axis')

        return self._constructor(values,
                                 items=items,
                                 major_axis=major_axis,
                                 minor_axis=minor_axis)

    def truncate(self, before=None, after=None, axis='major'):
        """Function truncates a sorted Panel before and/or after some
        particular values on the requested axis

        Parameters
        ----------
        before : date
            Left boundary
        after : date
            Right boundary
        axis : {'major', 'minor', 'items'}

        Returns
        -------
        Panel
        """
        axis = self._get_axis_name(axis)
        index = self._get_axis(axis)

        beg_slice, end_slice = index.slice_locs(before, after)
        new_index = index[beg_slice:end_slice]

        return self.reindex(**{axis: new_index})

    def join(self, other, how='left', lsuffix='', rsuffix=''):
        """
        Join items with other Panel either on major and minor axes column

        Parameters
        ----------
        other : Panel or list of Panels
            Index should be similar to one of the columns in this one
        how : {'left', 'right', 'outer', 'inner'}
            How to handle indexes of the two objects. Default: 'left'
            for joining on index, None otherwise
            * left: use calling frame's index
            * right: use input frame's index
            * outer: form union of indexes
            * inner: use intersection of indexes
        lsuffix : string
            Suffix to use from left frame's overlapping columns
        rsuffix : string
            Suffix to use from right frame's overlapping columns

        Returns
        -------
        joined : Panel
        """
        from pandas.tools.merge import concat

        if isinstance(other, Panel):
            join_major, join_minor = self._get_join_index(other, how)
            this = self.reindex(major=join_major, minor=join_minor)
            other = other.reindex(major=join_major, minor=join_minor)
            merged_data = this._data.merge(other._data, lsuffix, rsuffix)
            return self._constructor(merged_data)
        else:
            if lsuffix or rsuffix:
                raise ValueError(
                    'Suffixes not supported when passing multiple '
                    'panels')

            if how == 'left':
                how = 'outer'
                join_axes = [self.major_axis, self.minor_axis]
            elif how == 'right':
                raise ValueError('Right join not supported with multiple '
                                 'panels')
            else:
                join_axes = None

            return concat([self] + list(other),
                          axis=0,
                          join=how,
                          join_axes=join_axes,
                          verify_integrity=True)

    def _get_join_index(self, other, how):
        if how == 'left':
            join_major, join_minor = self.major_axis, self.minor_axis
        elif how == 'right':
            join_major, join_minor = other.major_axis, other.minor_axis
        elif how == 'inner':
            join_major = self.major_axis.intersection(other.major_axis)
            join_minor = self.minor_axis.intersection(other.minor_axis)
        elif how == 'outer':
            join_major = self.major_axis.union(other.major_axis)
            join_minor = self.minor_axis.union(other.minor_axis)
        return join_major, join_minor
예제 #4
0
파일: panel4d.py 프로젝트: MarkyV/pandas
class Panel4D(Panel):
    _AXIS_ORDERS = ['labels', 'items', 'major_axis', 'minor_axis']
    _AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(_AXIS_ORDERS)])
    _AXIS_ALIASES = {'major': 'major_axis', 'minor': 'minor_axis'}
    _AXIS_NAMES = dict([(i, a) for i, a in enumerate(_AXIS_ORDERS)])
    _AXIS_SLICEMAP = {
        'items': 'items',
        'major_axis': 'major_axis',
        'minor_axis': 'minor_axis'
    }
    _AXIS_LEN = len(_AXIS_ORDERS)

    # major
    _default_stat_axis = 2

    # info axis
    _het_axis = 0
    _info_axis = _AXIS_ORDERS[_het_axis]

    labels = lib.AxisProperty(0)
    items = lib.AxisProperty(1)
    major_axis = lib.AxisProperty(2)
    minor_axis = lib.AxisProperty(3)

    _constructor_sliced = Panel

    def __init__(self,
                 data=None,
                 labels=None,
                 items=None,
                 major_axis=None,
                 minor_axis=None,
                 copy=False,
                 dtype=None):
        """
        Represents a 4 dimensonal structured

        Parameters
        ----------
        data : ndarray (labels x items x major x minor), or dict of Panels

        labels : Index or array-like : axis=0
        items  : Index or array-like : axis=1
        major_axis : Index or array-like: axis=2
        minor_axis : Index or array-like: axis=3

        dtype : dtype, default None
            Data type to force, otherwise infer
        copy : boolean, default False
            Copy data from inputs. Only affects DataFrame / 2d ndarray input
        """
        self._init_data(data=data,
                        labels=labels,
                        items=items,
                        major_axis=major_axis,
                        minor_axis=minor_axis,
                        copy=copy,
                        dtype=dtype)

    def _get_plane_axes(self, axis):
        axis = self._get_axis_name(axis)

        if axis == 'major_axis':
            items = self.labels
            major = self.items
            minor = self.minor_axis
        elif axis == 'minor_axis':
            items = self.labels
            major = self.items
            minor = self.major_axis
        elif axis == 'items':
            items = self.labels
            major = self.major_axis
            minor = self.minor_axis
        elif axis == 'labels':
            items = self.items
            major = self.major_axis
            minor = self.minor_axis

        return items, major, minor

    def _combine(self, other, func, axis=0):
        if isinstance(other, Panel4D):
            return self._combine_panel4d(other, func)
        return super(Panel4D, self)._combine(other, func, axis=axis)

    def _combine_panel4d(self, other, func):
        labels = self.labels + other.labels
        items = self.items + other.items
        major = self.major_axis + other.major_axis
        minor = self.minor_axis + other.minor_axis

        # could check that everything's the same size, but forget it
        this = self.reindex(labels=labels,
                            items=items,
                            major=major,
                            minor=minor)
        other = other.reindex(labels=labels,
                              items=items,
                              major=major,
                              minor=minor)

        result_values = func(this.values, other.values)

        return self._constructor(result_values, labels, items, major, minor)

    def join(self, other, how='left', lsuffix='', rsuffix=''):
        if isinstance(other, Panel4D):
            join_major, join_minor = self._get_join_index(other, how)
            this = self.reindex(major=join_major, minor=join_minor)
            other = other.reindex(major=join_major, minor=join_minor)
            merged_data = this._data.merge(other._data, lsuffix, rsuffix)
            return self._constructor(merged_data)
        return super(Panel4D, self).join(other=other,
                                         how=how,
                                         lsuffix=lsuffix,
                                         rsuffix=rsuffix)

    ### remove operations ####
    def to_frame(self, *args, **kwargs):
        raise NotImplementedError

    def to_excel(self, *args, **kwargs):
        raise NotImplementedError