def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_aliases=None, stat_axis=2): """ manufacture a n-d class: parameters ---------- klass_name : the klass name axis_orders : the names of the axes in order (highest to lowest) axis_slices : a dictionary that defines how the axes map to the sliced axis slicer : the class representing a slice of this panel axis_aliases: a dictionary defining aliases for various axes default = { major : major_axis, minor : minor_axis } stat_axis : the default statistic axis default = 2 het_axis : the info axis returns ------- a class object reprsenting this panel """ # if slicer is a name, get the object if isinstance(slicer, basestring): import pandas try: slicer = getattr(pandas, slicer) except: raise Exception("cannot create this slicer [%s]" % slicer) # build the klass klass = type(klass_name, (slicer, ), {}) # add the class variables klass._AXIS_ORDERS = axis_orders klass._AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(axis_orders)]) klass._AXIS_ALIASES = axis_aliases or dict() klass._AXIS_NAMES = dict([(i, a) for i, a in enumerate(axis_orders)]) klass._AXIS_SLICEMAP = axis_slices klass._AXIS_LEN = len(axis_orders) klass._default_stat_axis = stat_axis klass._het_axis = 0 klass._info_axis = axis_orders[klass._het_axis] klass._constructor_sliced = slicer # add the axes for i, a in enumerate(axis_orders): setattr(klass, a, lib.AxisProperty(i)) #### define the methods #### def __init__(self, *args, **kwargs): if not (kwargs.get('data') or len(args)): raise Exception("must supply at least a data argument to [%s]" % klass_name) if 'copy' not in kwargs: kwargs['copy'] = False if 'dtype' not in kwargs: kwargs['dtype'] = None self._init_data(*args, **kwargs) klass.__init__ = __init__ def _get_plane_axes(self, axis): axis = self._get_axis_name(axis) index = self._AXIS_ORDERS.index(axis) planes = [] if index: planes.extend(self._AXIS_ORDERS[0:index]) if index != self._AXIS_LEN: planes.extend(self._AXIS_ORDERS[index + 1:]) return [getattr(self, p) for p in planes] klass._get_plane_axes = _get_plane_axes def _combine(self, other, func, axis=0): if isinstance(other, klass): return self._combine_with_constructor(other, func) return super(klass, self)._combine(other, func, axis=axis) klass._combine = _combine def _combine_with_constructor(self, other, func): # combine labels to form new axes new_axes = [] for a in self._AXIS_ORDERS: new_axes.append(getattr(self, a) + getattr(other, a)) # reindex: could check that everything's the same size, but forget it d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, new_axes)]) d['copy'] = False this = self.reindex(**d) other = other.reindex(**d) result_values = func(this.values, other.values) return self._constructor(result_values, **d) klass._combine_with_constructor = _combine_with_constructor # set as NonImplemented operations which we don't support for f in [ 'to_frame', 'to_excel', 'to_sparse', 'groupby', 'join', 'filter', 'dropna', 'shift' ]: def func(self, *args, **kwargs): raise NotImplementedError setattr(klass, f, func) # add the aggregate operations klass._add_aggregate_operations() return klass
def create_nd_panel_factory(klass_name, axis_orders, axis_slices, slicer, axis_aliases=None, stat_axis=2): """ manufacture a n-d class: parameters ---------- klass_name : the klass name axis_orders : the names of the axes in order (highest to lowest) axis_slices : a dictionary that defines how the axes map to the sliced axis slicer : the class representing a slice of this panel axis_aliases: a dictionary defining aliases for various axes default = { major : major_axis, minor : minor_axis } stat_axis : the default statistic axis default = 2 het_axis : the info axis returns ------- a class object reprsenting this panel """ # build the klass klass = type(klass_name, (slicer, ), {}) # add the class variables klass._AXIS_ORDERS = axis_orders klass._AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(axis_orders)]) klass._AXIS_ALIASES = axis_aliases or dict() klass._AXIS_NAMES = dict([(i, a) for i, a in enumerate(axis_orders)]) klass._AXIS_SLICEMAP = axis_slices klass._AXIS_LEN = len(axis_orders) klass._default_stat_axis = stat_axis klass._het_axis = 0 klass._info_axis = axis_orders[klass._het_axis] klass._constructor_sliced = slicer # add the axes for i, a in enumerate(axis_orders): setattr(klass, a, lib.AxisProperty(i)) # define the __init__ def __init__(self, *args, **kwargs): if not (kwargs.get('data') or len(args)): raise Exception("must supply at least a data argument to [%s]" % klass_name) if 'copy' not in kwargs: kwargs['copy'] = False if 'dtype' not in kwargs: kwargs['dtype'] = None self._init_data(*args, **kwargs) klass.__init__ = __init__ # define _get_place_axes def _get_plane_axes(self, axis): axis = self._get_axis_name(axis) index = self._AXIS_ORDERS.index(axis) planes = [] if index: planes.extend(self._AXIS_ORDERS[0:index]) if index != self._AXIS_LEN: planes.extend(self._AXIS_ORDERS[index:]) return planes klass._get_plane_axes # remove these operations def to_frame(self, *args, **kwargs): raise NotImplementedError klass.to_frame = to_frame def to_excel(self, *args, **kwargs): raise NotImplementedError klass.to_excel = to_excel return klass
class Panel(NDFrame): _AXIS_NUMBERS = {'items': 0, 'major_axis': 1, 'minor_axis': 2} _AXIS_ALIASES = {'major': 'major_axis', 'minor': 'minor_axis'} _AXIS_NAMES = {0: 'items', 1: 'major_axis', 2: 'minor_axis'} # major _default_stat_axis = 1 _het_axis = 0 items = lib.AxisProperty(0) major_axis = lib.AxisProperty(1) minor_axis = lib.AxisProperty(2) __add__ = _arith_method(operator.add, '__add__') __sub__ = _arith_method(operator.sub, '__sub__') __truediv__ = _arith_method(operator.truediv, '__truediv__') __floordiv__ = _arith_method(operator.floordiv, '__floordiv__') __mul__ = _arith_method(operator.mul, '__mul__') __pow__ = _arith_method(operator.pow, '__pow__') __radd__ = _arith_method(operator.add, '__radd__') __rmul__ = _arith_method(operator.mul, '__rmul__') __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__') __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__') __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__') __rpow__ = _arith_method(lambda x, y: y**x, '__rpow__') if not py3compat.PY3: __div__ = _arith_method(operator.div, '__div__') __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__') def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): """ Represents wide format panel data, stored as 3-dimensional array Parameters ---------- data : ndarray (items x major x minor), or dict of DataFrames items : Index or array-like axis=1 major_axis : Index or array-like axis=1 minor_axis : Index or array-like axis=2 dtype : dtype, default None Data type to force, otherwise infer copy : boolean, default False Copy data from inputs. Only affects DataFrame / 2d ndarray input """ if data is None: data = {} passed_axes = [items, major_axis, minor_axis] axes = None if isinstance(data, BlockManager): if any(x is not None for x in passed_axes): axes = [ x if x is not None else y for x, y in zip(passed_axes, data.axes) ] mgr = data elif isinstance(data, dict): mgr = self._init_dict(data, passed_axes, dtype=dtype) copy = False dtype = None elif isinstance(data, (np.ndarray, list)): mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy) copy = False dtype = None else: # pragma: no cover raise PandasError('Panel constructor not properly called!') NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype) @classmethod def _from_axes(cls, data, axes): # for construction from BlockManager if isinstance(data, BlockManager): return cls(data) else: items, major, minor = axes return cls(data, items=items, major_axis=major, minor_axis=minor, copy=False) def _init_dict(self, data, axes, dtype=None): items, major, minor = axes # prefilter if items passed if items is not None: items = _ensure_index(items) data = dict((k, v) for k, v in data.iteritems() if k in items) else: items = Index(_try_sort(data.keys())) for k, v in data.iteritems(): if isinstance(v, dict): data[k] = DataFrame(v) if major is None: major = _extract_axis(data, axis=0) if minor is None: minor = _extract_axis(data, axis=1) axes = [items, major, minor] reshaped_data = data.copy() # shallow item_shape = len(major), len(minor) for item in items: v = values = data.get(item) if v is None: values = np.empty(item_shape, dtype=dtype) values.fill(np.nan) elif isinstance(v, DataFrame): v = v.reindex(index=major, columns=minor, copy=False) if dtype is not None: v = v.astype(dtype) values = v.values reshaped_data[item] = values # segregates dtypes and forms blocks matching to columns blocks = form_blocks(reshaped_data, axes) mgr = BlockManager(blocks, axes).consolidate() return mgr @property def shape(self): return len(self.items), len(self.major_axis), len(self.minor_axis) @classmethod def from_dict(cls, data, intersect=False, orient='items', dtype=None): """ Construct Panel from dict of DataFrame objects Parameters ---------- data : dict {field : DataFrame} intersect : boolean Intersect indexes of input DataFrames orient : {'items', 'minor'}, default 'items' The "orientation" of the data. If the keys of the passed dict should be the items of the result panel, pass 'items' (default). Otherwise if the columns of the values of the passed DataFrame objects should be the items (which in the case of mixed-dtype data you should do), instead pass 'minor' Returns ------- Panel """ from collections import defaultdict orient = orient.lower() if orient == 'minor': new_data = defaultdict(dict) for col, df in data.iteritems(): for item, s in df.iteritems(): new_data[item][col] = s data = new_data elif orient != 'items': # pragma: no cover raise ValueError('only recognize items or minor for orientation') data, index, columns = _homogenize_dict(data, intersect=intersect, dtype=dtype) items = Index(sorted(data.keys())) return cls(data, items, index, columns) def __getitem__(self, key): if isinstance(self.items, MultiIndex): return self._getitem_multilevel(key) return super(Panel, self).__getitem__(key) def _getitem_multilevel(self, key): loc = self.items.get_loc(key) if isinstance(loc, (slice, np.ndarray)): new_index = self.items[loc] result_index = _maybe_droplevels(new_index, key) new_values = self.values[loc, :, :] result = Panel(new_values, items=result_index, major_axis=self.major_axis, minor_axis=self.minor_axis) return result else: return self._get_item_cache(key) def _init_matrix(self, data, axes, dtype=None, copy=False): values = _prep_ndarray(data, copy=copy) if dtype is not None: try: values = values.astype(dtype) except Exception: raise ValueError('failed to cast to %s' % dtype) shape = values.shape fixed_axes = [] for i, ax in enumerate(axes): if ax is None: ax = _default_index(shape[i]) else: ax = _ensure_index(ax) fixed_axes.append(ax) items = fixed_axes[0] block = make_block(values, items, items) return BlockManager([block], fixed_axes) #---------------------------------------------------------------------- # Array interface def __array__(self, dtype=None): return self.values def __array_wrap__(self, result): return self._constructor(result, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, copy=False) #---------------------------------------------------------------------- # Magic methods def __repr__(self): class_name = str(self.__class__) I, N, K = len(self.items), len(self.major_axis), len(self.minor_axis) dims = 'Dimensions: %d (items) x %d (major) x %d (minor)' % (I, N, K) if len(self.major_axis) > 0: major = 'Major axis: %s to %s' % (self.major_axis[0], self.major_axis[-1]) else: major = 'Major axis: None' if len(self.minor_axis) > 0: minor = 'Minor axis: %s to %s' % (self.minor_axis[0], self.minor_axis[-1]) else: minor = 'Minor axis: None' if len(self.items) > 0: items = 'Items: %s to %s' % (self.items[0], self.items[-1]) else: items = 'Items: None' output = '%s\n%s\n%s\n%s\n%s' % (class_name, dims, items, major, minor) return output def __iter__(self): return iter(self.items) def iteritems(self): for item in self.items: yield item, self[item] # Name that won't get automatically converted to items by 2to3. items is # already in use for the first axis. iterkv = iteritems def _get_plane_axes(self, axis): """ """ axis = self._get_axis_name(axis) if axis == 'major_axis': index = self.minor_axis columns = self.items if axis == 'minor_axis': index = self.major_axis columns = self.items elif axis == 'items': index = self.major_axis columns = self.minor_axis return index, columns @property def _constructor(self): return type(self) # Fancy indexing _ix = None @property def ix(self): if self._ix is None: self._ix = _NDFrameIndexer(self) return self._ix def _wrap_array(self, arr, axes, copy=False): items, major, minor = axes return self._constructor(arr, items=items, major_axis=major, minor_axis=minor, copy=copy) fromDict = from_dict def to_sparse(self, fill_value=None, kind='block'): """ Convert to SparsePanel Parameters ---------- fill_value : float, default NaN kind : {'block', 'integer'} Returns ------- y : SparseDataFrame """ from pandas.core.sparse import SparsePanel frames = dict(self.iterkv()) return SparsePanel(frames, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, default_kind=kind, default_fill_value=fill_value) def to_excel(self, path, na_rep=''): """ Write each DataFrame in Panel to a separate excel sheet Parameters ---------- excel_writer : string or ExcelWriter object File path or existing ExcelWriter na_rep : string, default '' Missing data representation """ from pandas.io.parsers import ExcelWriter writer = ExcelWriter(path) for item, df in self.iteritems(): name = str(item) df.to_excel(writer, name, na_rep=na_rep) writer.save() # TODO: needed? def keys(self): return list(self.items) def _get_values(self): self._consolidate_inplace() return self._data.as_matrix() values = property(fget=_get_values) #---------------------------------------------------------------------- # Getting and setting elements def get_value(self, item, major, minor): """ Quickly retrieve single value at (item, major, minor) location Parameters ---------- item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) Returns ------- value : scalar value """ # hm, two layers to the onion frame = self._get_item_cache(item) return frame.get_value(major, minor) def set_value(self, item, major, minor, value): """ Quickly set single value at (item, major, minor) location Parameters ---------- item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) value : scalar Returns ------- panel : Panel If label combo is contained, will be reference to calling Panel, otherwise a new object """ try: frame = self._get_item_cache(item) frame.set_value(major, minor, value) return self except KeyError: ax1, ax2, ax3 = self._expand_axes((item, major, minor)) result = self.reindex(items=ax1, major=ax2, minor=ax3, copy=False) likely_dtype = com._infer_dtype(value) made_bigger = not np.array_equal(ax1, self.items) # how to make this logic simpler? if made_bigger: com._possibly_cast_item(result, item, likely_dtype) return result.set_value(item, major, minor, value) def _box_item_values(self, key, values): return DataFrame(values, index=self.major_axis, columns=self.minor_axis) def __getattr__(self, name): """After regular attribute access, try looking up the name of an item. This allows simpler access to items for interactive use.""" if name in self.items: return self[name] raise AttributeError("'%s' object has no attribute '%s'" % (type(self).__name__, name)) def _slice(self, slobj, axis=0): new_data = self._data.get_slice(slobj, axis=axis) return self._constructor(new_data) def __setitem__(self, key, value): _, N, K = self.shape if isinstance(value, DataFrame): value = value.reindex(index=self.major_axis, columns=self.minor_axis) mat = value.values elif isinstance(value, np.ndarray): assert (value.shape == (N, K)) mat = np.asarray(value) elif np.isscalar(value): dtype = _infer_dtype(value) mat = np.empty((N, K), dtype=dtype) mat.fill(value) mat = mat.reshape((1, N, K)) NDFrame._set_item(self, key, mat) def pop(self, item): """ Return item slice from panel and delete from panel Parameters ---------- key : object Must be contained in panel's items Returns ------- y : DataFrame """ return NDFrame.pop(self, item) def __getstate__(self): "Returned pickled representation of the panel" return self._data def __setstate__(self, state): # old Panel pickle if isinstance(state, BlockManager): self._data = state elif len(state) == 4: # pragma: no cover self._unpickle_panel_compat(state) else: # pragma: no cover raise ValueError('unrecognized pickle') self._item_cache = {} def _unpickle_panel_compat(self, state): # pragma: no cover "Unpickle the panel" _unpickle = com._unpickle_array vals, items, major, minor = state items = _unpickle(items) major = _unpickle(major) minor = _unpickle(minor) values = _unpickle(vals) wp = Panel(values, items, major, minor) self._data = wp._data def conform(self, frame, axis='items'): """ Conform input DataFrame to align with chosen axis pair. Parameters ---------- frame : DataFrame axis : {'items', 'major', 'minor'} Axis the input corresponds to. E.g., if axis='major', then the frame's columns would be items, and the index would be values of the minor axis Returns ------- DataFrame """ index, columns = self._get_plane_axes(axis) return frame.reindex(index=index, columns=columns) def reindex(self, major=None, items=None, minor=None, method=None, major_axis=None, minor_axis=None, copy=True): """ Conform panel to new axis or axes Parameters ---------- major : Index or sequence, default None Can also use 'major_axis' keyword items : Index or sequence, default None minor : Index or sequence, default None Can also use 'minor_axis' keyword method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap copy : boolean, default True Return a new object, even if the passed indexes are the same Returns ------- Panel (new object) """ result = self major = _mut_exclusive(major, major_axis) minor = _mut_exclusive(minor, minor_axis) if (method is None and not self._is_mixed_type and com._count_not_none(items, major, minor) == 3): return self._reindex_multi(items, major, minor) if major is not None: result = result._reindex_axis(major, method, 1, copy) if minor is not None: result = result._reindex_axis(minor, method, 2, copy) if items is not None: result = result._reindex_axis(items, method, 0, copy) if result is self and copy: raise ValueError('Must specify at least one axis') return result def _reindex_multi(self, items, major, minor): a0, a1, a2 = len(items), len(major), len(minor) values = self.values new_values = np.empty((a0, a1, a2), dtype=values.dtype) new_items, indexer0 = self.items.reindex(items) new_major, indexer1 = self.major_axis.reindex(major) new_minor, indexer2 = self.minor_axis.reindex(minor) if indexer0 is None: indexer0 = range(len(new_items)) if indexer1 is None: indexer1 = range(len(new_major)) if indexer2 is None: indexer2 = range(len(new_minor)) for i, ind in enumerate(indexer0): com.take_2d_multi(values[ind], indexer1, indexer2, out=new_values[i]) return Panel(new_values, items=new_items, major_axis=new_major, minor_axis=new_minor) def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True): """Conform Panel to new index with optional filling logic, placing NA/NaN in locations having no value in the previous index. A new object is produced unless the new index is equivalent to the current one and copy=False Parameters ---------- index : array-like, optional New labels / index to conform to. Preferably an Index object to avoid duplicating data axis : {0, 1} 0 -> index (rows) 1 -> columns method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed DataFrame pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap copy : boolean, default True Return a new object, even if the passed indexes are the same level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level Returns ------- reindexed : Panel """ self._consolidate_inplace() return self._reindex_axis(labels, method, axis, copy) def reindex_like(self, other, method=None): """ Reindex Panel to match indices of another Panel Parameters ---------- other : Panel method : string or None Returns ------- reindexed : Panel """ # todo: object columns return self.reindex(major=other.major_axis, items=other.items, minor=other.minor_axis, method=method) def dropna(self, axis=0, how='any'): """ Drop 2D from panel, holding passed axis constant Parameters ---------- axis : int, default 0 Axis to hold constant. E.g. axis=1 will drop major_axis entries having a certain amount of NA data how : {'all', 'any'}, default 'any' 'any': one or more values are NA in the DataFrame along the axis. For 'all' they all must be. Returns ------- dropped : Panel """ axis = self._get_axis_number(axis) values = self.values mask = com.notnull(values) for ax in reversed(sorted(set(range(3)) - set([axis]))): mask = mask.sum(ax) per_slice = np.prod(values.shape[:axis] + values.shape[axis + 1:]) if how == 'all': cond = mask > 0 else: cond = mask == per_slice new_ax = self._get_axis(axis)[cond] return self.reindex_axis(new_ax, axis=axis) def _combine(self, other, func, axis=0): if isinstance(other, Panel): return self._combine_panel(other, func) elif isinstance(other, DataFrame): return self._combine_frame(other, func, axis=axis) elif np.isscalar(other): new_values = func(self.values, other) return self._constructor(new_values, self.items, self.major_axis, self.minor_axis) def __neg__(self): return -1 * self def _combine_frame(self, other, func, axis=0): index, columns = self._get_plane_axes(axis) axis = self._get_axis_number(axis) other = other.reindex(index=index, columns=columns) if axis == 0: new_values = func(self.values, other.values) elif axis == 1: new_values = func(self.values.swapaxes(0, 1), other.values.T) new_values = new_values.swapaxes(0, 1) elif axis == 2: new_values = func(self.values.swapaxes(0, 2), other.values) new_values = new_values.swapaxes(0, 2) return self._constructor(new_values, self.items, self.major_axis, self.minor_axis) def _combine_panel(self, other, func): items = self.items + other.items major = self.major_axis + other.major_axis minor = self.minor_axis + other.minor_axis # could check that everything's the same size, but forget it this = self.reindex(items=items, major=major, minor=minor) other = other.reindex(items=items, major=major, minor=minor) result_values = func(this.values, other.values) return self._constructor(result_values, items, major, minor) def fillna(self, value=None, method='pad'): """ Fill NaN values using the specified method. Member Series / TimeSeries are filled separately. Parameters ---------- value : any kind (should be same type as array) Value to use to fill holes (e.g. 0) method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad' Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap Returns ------- y : DataFrame See also -------- DataFrame.reindex, DataFrame.asfreq """ if value is None: result = {} for col, s in self.iterkv(): result[col] = s.fillna(method=method, value=value) return self._constructor.from_dict(result) else: new_data = self._data.fillna(value) return self._constructor(new_data) add = _panel_arith_method(operator.add, 'add') subtract = sub = _panel_arith_method(operator.sub, 'subtract') multiply = mul = _panel_arith_method(operator.mul, 'multiply') try: divide = div = _panel_arith_method(operator.div, 'divide') except AttributeError: # pragma: no cover # Python 3 divide = div = _panel_arith_method(operator.truediv, 'divide') def major_xs(self, key, copy=True): """ Return slice of panel along major axis Parameters ---------- key : object Major axis label copy : boolean, default False Copy data Returns ------- y : DataFrame index -> minor axis, columns -> items """ return self.xs(key, axis=1, copy=copy) def minor_xs(self, key, copy=True): """ Return slice of panel along minor axis Parameters ---------- key : object Minor axis label copy : boolean, default False Copy data Returns ------- y : DataFrame index -> major axis, columns -> items """ return self.xs(key, axis=2, copy=copy) def xs(self, key, axis=1, copy=True): """ Return slice of panel along selected axis Parameters ---------- key : object Label axis : {'items', 'major', 'minor}, default 1/'major' Returns ------- y : DataFrame """ if axis == 0: data = self[key] if copy: data = data.copy() return data self._consolidate_inplace() axis_number = self._get_axis_number(axis) new_data = self._data.xs(key, axis=axis_number, copy=copy) return DataFrame(new_data) def _ixs(self, i, axis=0): # for compatibility with .ix indexing # Won't work with hierarchical indexing yet key = self._get_axis(axis)[i] return self.xs(key, axis=axis) def groupby(self, function, axis='major'): """ Group data on given axis, returning GroupBy object Parameters ---------- function : callable Mapping function for chosen access axis : {'major', 'minor', 'items'}, default 'major' Returns ------- grouped : PanelGroupBy """ from pandas.core.groupby import PanelGroupBy axis = self._get_axis_number(axis) return PanelGroupBy(self, function, axis=axis) def swapaxes(self, axis1='major', axis2='minor', copy=True): """ Interchange axes and swap values axes appropriately Returns ------- y : Panel (new object) """ i = self._get_axis_number(axis1) j = self._get_axis_number(axis2) if i == j: raise ValueError('Cannot specify the same axis') mapping = {i: j, j: i} new_axes = (self._get_axis(mapping.get(k, k)) for k in range(3)) new_values = self.values.swapaxes(i, j) if copy: new_values = new_values.copy() return self._constructor(new_values, *new_axes) def transpose(self, items='items', major='major', minor='minor', copy=False): """ Permute the dimensions of the Panel Parameters ---------- items : int or one of {'items', 'major', 'minor'} major : int or one of {'items', 'major', 'minor'} minor : int or one of {'items', 'major', 'minor'} copy : boolean, default False Make a copy of the underlying data. Mixed-dtype data will always result in a copy Examples -------- >>> p.transpose(2, 0, 1) >>> p.transpose(2, 0, 1, copy=True) Returns ------- y : Panel (new object) """ i, j, k = [self._get_axis_number(x) for x in [items, major, minor]] if i == j or i == k or j == k: raise ValueError('Must specify 3 unique axes') new_axes = [self._get_axis(x) for x in [i, j, k]] new_values = self.values.transpose((i, j, k)) if copy: new_values = new_values.copy() return self._constructor(new_values, *new_axes) def to_frame(self, filter_observations=True): """ Transform wide format into long (stacked) format as DataFrame Parameters ---------- filter_observations : boolean, default True Drop (major, minor) pairs without a complete set of observations across all the items Returns ------- y : DataFrame """ _, N, K = self.shape if filter_observations: mask = com.notnull(self.values).all(axis=0) # size = mask.sum() selector = mask.ravel() else: # size = N * K selector = slice(None, None) data = {} for item in self.items: data[item] = self[item].values.ravel()[selector] major_labels = np.arange(N).repeat(K)[selector] # Anyone think of a better way to do this? np.repeat does not # do what I want minor_labels = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)] minor_labels = minor_labels.ravel()[selector] index = MultiIndex(levels=[self.major_axis, self.minor_axis], labels=[major_labels, minor_labels], names=['major', 'minor']) return DataFrame(data, index=index, columns=self.items) to_long = deprecate('to_long', to_frame) toLong = deprecate('toLong', to_frame) def filter(self, items): """ Restrict items in panel to input list Parameters ---------- items : sequence Returns ------- y : Panel """ intersection = self.items.intersection(items) return self.reindex(items=intersection) def apply(self, func, axis='major'): """ Apply Parameters ---------- func : numpy function Signature should match numpy.{sum, mean, var, std} etc. axis : {'major', 'minor', 'items'} fill_value : boolean, default True Replace NaN values with specified first Returns ------- result : DataFrame or Panel """ i = self._get_axis_number(axis) result = np.apply_along_axis(func, i, self.values) return self._wrap_result(result, axis=axis) def _reduce(self, op, axis=0, skipna=True): axis_name = self._get_axis_name(axis) axis_number = self._get_axis_number(axis_name) f = lambda x: op(x, axis=axis_number, skipna=skipna) result = f(self.values) index, columns = self._get_plane_axes(axis_name) if axis_name != 'items': result = result.T return DataFrame(result, index=index, columns=columns) def _wrap_result(self, result, axis): axis = self._get_axis_name(axis) index, columns = self._get_plane_axes(axis) if axis != 'items': result = result.T return DataFrame(result, index=index, columns=columns) def count(self, axis='major'): """ Return number of observations over requested axis. Parameters ---------- axis : {'items', 'major', 'minor'} or {0, 1, 2} Returns ------- count : DataFrame """ i = self._get_axis_number(axis) values = self.values mask = np.isfinite(values) result = mask.sum(axis=i) return self._wrap_result(result, axis) @Substitution(desc='sum', outname='sum') @Appender(_agg_doc) def sum(self, axis='major', skipna=True): return self._reduce(nanops.nansum, axis=axis, skipna=skipna) @Substitution(desc='mean', outname='mean') @Appender(_agg_doc) def mean(self, axis='major', skipna=True): return self._reduce(nanops.nanmean, axis=axis, skipna=skipna) @Substitution(desc='unbiased variance', outname='variance') @Appender(_agg_doc) def var(self, axis='major', skipna=True): return self._reduce(nanops.nanvar, axis=axis, skipna=skipna) @Substitution(desc='unbiased standard deviation', outname='stdev') @Appender(_agg_doc) def std(self, axis='major', skipna=True): return self.var(axis=axis, skipna=skipna).apply(np.sqrt) @Substitution(desc='unbiased skewness', outname='skew') @Appender(_agg_doc) def skew(self, axis='major', skipna=True): return self._reduce(nanops.nanskew, axis=axis, skipna=skipna) @Substitution(desc='product', outname='prod') @Appender(_agg_doc) def prod(self, axis='major', skipna=True): return self._reduce(nanops.nanprod, axis=axis, skipna=skipna) @Substitution(desc='compounded percentage', outname='compounded') @Appender(_agg_doc) def compound(self, axis='major', skipna=True): return (1 + self).prod(axis=axis, skipna=skipna) - 1 @Substitution(desc='median', outname='median') @Appender(_agg_doc) def median(self, axis='major', skipna=True): return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna) @Substitution(desc='maximum', outname='maximum') @Appender(_agg_doc) def max(self, axis='major', skipna=True): return self._reduce(nanops.nanmax, axis=axis, skipna=skipna) @Substitution(desc='minimum', outname='minimum') @Appender(_agg_doc) def min(self, axis='major', skipna=True): return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) def shift(self, lags, axis='major'): """ Shift major or minor axis by specified number of lags. Drops periods Parameters ---------- lags : int Needs to be a positive number currently axis : {'major', 'minor'} Returns ------- shifted : Panel """ values = self.values items = self.items major_axis = self.major_axis minor_axis = self.minor_axis if axis == 'major': values = values[:, :-lags, :] major_axis = major_axis[lags:] elif axis == 'minor': values = values[:, :, :-lags] minor_axis = minor_axis[lags:] else: raise ValueError('Invalid axis') return self._constructor(values, items=items, major_axis=major_axis, minor_axis=minor_axis) def truncate(self, before=None, after=None, axis='major'): """Function truncates a sorted Panel before and/or after some particular values on the requested axis Parameters ---------- before : date Left boundary after : date Right boundary axis : {'major', 'minor', 'items'} Returns ------- Panel """ axis = self._get_axis_name(axis) index = self._get_axis(axis) beg_slice, end_slice = index.slice_locs(before, after) new_index = index[beg_slice:end_slice] return self.reindex(**{axis: new_index}) def join(self, other, how='left', lsuffix='', rsuffix=''): """ Join items with other Panel either on major and minor axes column Parameters ---------- other : Panel or list of Panels Index should be similar to one of the columns in this one how : {'left', 'right', 'outer', 'inner'} How to handle indexes of the two objects. Default: 'left' for joining on index, None otherwise * left: use calling frame's index * right: use input frame's index * outer: form union of indexes * inner: use intersection of indexes lsuffix : string Suffix to use from left frame's overlapping columns rsuffix : string Suffix to use from right frame's overlapping columns Returns ------- joined : Panel """ from pandas.tools.merge import concat if isinstance(other, Panel): join_major, join_minor = self._get_join_index(other, how) this = self.reindex(major=join_major, minor=join_minor) other = other.reindex(major=join_major, minor=join_minor) merged_data = this._data.merge(other._data, lsuffix, rsuffix) return self._constructor(merged_data) else: if lsuffix or rsuffix: raise ValueError( 'Suffixes not supported when passing multiple ' 'panels') if how == 'left': how = 'outer' join_axes = [self.major_axis, self.minor_axis] elif how == 'right': raise ValueError('Right join not supported with multiple ' 'panels') else: join_axes = None return concat([self] + list(other), axis=0, join=how, join_axes=join_axes, verify_integrity=True) def _get_join_index(self, other, how): if how == 'left': join_major, join_minor = self.major_axis, self.minor_axis elif how == 'right': join_major, join_minor = other.major_axis, other.minor_axis elif how == 'inner': join_major = self.major_axis.intersection(other.major_axis) join_minor = self.minor_axis.intersection(other.minor_axis) elif how == 'outer': join_major = self.major_axis.union(other.major_axis) join_minor = self.minor_axis.union(other.minor_axis) return join_major, join_minor
class Panel4D(Panel): _AXIS_ORDERS = ['labels', 'items', 'major_axis', 'minor_axis'] _AXIS_NUMBERS = dict([(a, i) for i, a in enumerate(_AXIS_ORDERS)]) _AXIS_ALIASES = {'major': 'major_axis', 'minor': 'minor_axis'} _AXIS_NAMES = dict([(i, a) for i, a in enumerate(_AXIS_ORDERS)]) _AXIS_SLICEMAP = { 'items': 'items', 'major_axis': 'major_axis', 'minor_axis': 'minor_axis' } _AXIS_LEN = len(_AXIS_ORDERS) # major _default_stat_axis = 2 # info axis _het_axis = 0 _info_axis = _AXIS_ORDERS[_het_axis] labels = lib.AxisProperty(0) items = lib.AxisProperty(1) major_axis = lib.AxisProperty(2) minor_axis = lib.AxisProperty(3) _constructor_sliced = Panel def __init__(self, data=None, labels=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): """ Represents a 4 dimensonal structured Parameters ---------- data : ndarray (labels x items x major x minor), or dict of Panels labels : Index or array-like : axis=0 items : Index or array-like : axis=1 major_axis : Index or array-like: axis=2 minor_axis : Index or array-like: axis=3 dtype : dtype, default None Data type to force, otherwise infer copy : boolean, default False Copy data from inputs. Only affects DataFrame / 2d ndarray input """ self._init_data(data=data, labels=labels, items=items, major_axis=major_axis, minor_axis=minor_axis, copy=copy, dtype=dtype) def _get_plane_axes(self, axis): axis = self._get_axis_name(axis) if axis == 'major_axis': items = self.labels major = self.items minor = self.minor_axis elif axis == 'minor_axis': items = self.labels major = self.items minor = self.major_axis elif axis == 'items': items = self.labels major = self.major_axis minor = self.minor_axis elif axis == 'labels': items = self.items major = self.major_axis minor = self.minor_axis return items, major, minor def _combine(self, other, func, axis=0): if isinstance(other, Panel4D): return self._combine_panel4d(other, func) return super(Panel4D, self)._combine(other, func, axis=axis) def _combine_panel4d(self, other, func): labels = self.labels + other.labels items = self.items + other.items major = self.major_axis + other.major_axis minor = self.minor_axis + other.minor_axis # could check that everything's the same size, but forget it this = self.reindex(labels=labels, items=items, major=major, minor=minor) other = other.reindex(labels=labels, items=items, major=major, minor=minor) result_values = func(this.values, other.values) return self._constructor(result_values, labels, items, major, minor) def join(self, other, how='left', lsuffix='', rsuffix=''): if isinstance(other, Panel4D): join_major, join_minor = self._get_join_index(other, how) this = self.reindex(major=join_major, minor=join_minor) other = other.reindex(major=join_major, minor=join_minor) merged_data = this._data.merge(other._data, lsuffix, rsuffix) return self._constructor(merged_data) return super(Panel4D, self).join(other=other, how=how, lsuffix=lsuffix, rsuffix=rsuffix) ### remove operations #### def to_frame(self, *args, **kwargs): raise NotImplementedError def to_excel(self, *args, **kwargs): raise NotImplementedError