class SparsePanel(Panel): """ Sparse version of Panel Parameters ---------- frames : dict of DataFrame objects items : array-like major_axis : array-like minor_axis : array-like default_kind : {'block', 'integer'}, default 'block' Default sparse kind for converting Series to SparseSeries. Will not override SparseSeries passed into constructor default_fill_value : float Default fill_value for converting Series to SparseSeries. Will not override SparseSeries passed in Notes ----- """ ndim = 3 _typ = 'panel' _subtyp = 'sparse_panel' def __init__(self, frames, items=None, major_axis=None, minor_axis=None, default_fill_value=np.nan, default_kind='block', copy=False): if isinstance(frames, np.ndarray): new_frames = {} for item, vals in zip(items, frames): new_frames[item] = \ SparseDataFrame(vals, index=major_axis, columns=minor_axis, default_fill_value=default_fill_value, default_kind=default_kind) frames = new_frames if not (isinstance(frames, dict)): raise AssertionError() self.default_fill_value = fill_value = default_fill_value self.default_kind = kind = default_kind # pre-filter, if necessary if items is None: items = Index(sorted(frames.keys())) items = _ensure_index(items) (clean_frames, major_axis, minor_axis) = _convert_frames(frames, major_axis, minor_axis, kind=kind, fill_value=fill_value) self._frames = clean_frames # do we want to fill missing ones? for item in items: if item not in clean_frames: raise Exception('column %s not found in data' % item) self._items = items self.major_axis = major_axis self.minor_axis = minor_axis def _consolidate_inplace(self): # pragma: no cover # do nothing when DataFrame calls this method pass def __array_wrap__(self, result): return SparsePanel(result, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, default_kind=self.default_kind, default_fill_value=self.default_fill_value) @classmethod def from_dict(cls, data): """ Analogous to Panel.from_dict """ return SparsePanel(data) def to_dense(self): """ Convert SparsePanel to (dense) Panel Returns ------- dense : Panel """ return Panel(self.values, self.items, self.major_axis, self.minor_axis) def as_matrix(self): return self.values @property def values(self): # return dense values return np.array([self._frames[item].values for item in self.items]) # need a special property for items to make the field assignable _items = None def _get_items(self): return self._items def _set_items(self, new_items): new_items = _ensure_index(new_items) if isinstance(new_items, MultiIndex): raise NotImplementedError # need to create new frames dict old_frame_dict = self._frames old_items = self._items self._frames = dict((new_k, old_frame_dict[old_k]) for new_k, old_k in zip(new_items, old_items)) self._items = new_items items = property(fget=_get_items, fset=_set_items) # DataFrame's index major_axis = SparsePanelAxis('_major_axis', 'index') # DataFrame's columns / "items" minor_axis = SparsePanelAxis('_minor_axis', 'columns') def _get_item_cache(self, key): return self._frames[key] def __setitem__(self, key, value): if isinstance(value, DataFrame): value = value.reindex(index=self.major_axis, columns=self.minor_axis) if not isinstance(value, SparseDataFrame): value = value.to_sparse(fill_value=self.default_fill_value, kind=self.default_kind) else: raise ValueError('only DataFrame objects can be set currently') self._frames[key] = value if key not in self.items: self._items = Index(list(self.items) + [key]) def set_value(self, item, major, minor, value): """ Quickly set single value at (item, major, minor) location Parameters ---------- item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) value : scalar Notes ----- This method *always* returns a new object. It is not particularly efficient but is provided for API compatibility with Panel Returns ------- panel : SparsePanel """ dense = self.to_dense().set_value(item, major, minor, value) return dense.to_sparse(kind=self.default_kind, fill_value=self.default_fill_value) def __delitem__(self, key): loc = self.items.get_loc(key) indices = lrange(loc) + lrange(loc + 1, len(self.items)) del self._frames[key] self._items = self._items.take(indices) def __getstate__(self): # pickling return (self._frames, com._pickle_array(self.items), com._pickle_array(self.major_axis), com._pickle_array(self.minor_axis), self.default_fill_value, self.default_kind) def __setstate__(self, state): frames, items, major, minor, fv, kind = state self.default_fill_value = fv self.default_kind = kind self._items = _ensure_index(com._unpickle_array(items)) self._major_axis = _ensure_index(com._unpickle_array(major)) self._minor_axis = _ensure_index(com._unpickle_array(minor)) self._frames = frames def copy(self): """ Make a (shallow) copy of the sparse panel Returns ------- copy : SparsePanel """ return SparsePanel(self._frames.copy(), items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, default_fill_value=self.default_fill_value, default_kind=self.default_kind) def to_frame(self, filter_observations=True): """ Convert SparsePanel to (dense) DataFrame Returns ------- frame : DataFrame """ if not filter_observations: raise TypeError('filter_observations=False not supported for ' 'SparsePanel.to_long') I, N, K = self.shape counts = np.zeros(N * K, dtype=int) d_values = {} d_indexer = {} for item in self.items: frame = self[item] values, major, minor = _stack_sparse_info(frame) # values are stacked column-major indexer = minor * N + major counts.put(indexer, counts.take(indexer) + 1) # cuteness d_values[item] = values d_indexer[item] = indexer # have full set of observations for each item mask = counts == I # for each item, take mask values at index locations for those sparse # values, and use that to select values values = np.column_stack([d_values[item][mask.take(d_indexer[item])] for item in self.items]) inds, = mask.nonzero() # still column major major_labels = inds % N minor_labels = inds // N index = MultiIndex(levels=[self.major_axis, self.minor_axis], labels=[major_labels, minor_labels]) df = DataFrame(values, index=index, columns=self.items) return df.sortlevel(level=0) to_long = deprecate('to_long', to_frame) toLong = deprecate('toLong', to_frame) def reindex(self, major=None, items=None, minor=None, major_axis=None, minor_axis=None, copy=False): """ Conform / reshape panel axis labels to new input labels Parameters ---------- major : array-like, default None items : array-like, default None minor : array-like, default None copy : boolean, default False Copy underlying SparseDataFrame objects Returns ------- reindexed : SparsePanel """ major = com._mut_exclusive(major, major_axis) minor = com._mut_exclusive(minor, minor_axis) if com._all_none(items, major, minor): raise ValueError('Must specify at least one axis') major = self.major_axis if major is None else major minor = self.minor_axis if minor is None else minor if items is not None: new_frames = {} for item in items: if item in self._frames: new_frames[item] = self._frames[item] else: raise NotImplementedError('Reindexing with new items not yet ' 'supported') else: new_frames = self._frames if copy: new_frames = dict((k, v.copy()) for k, v in compat.iteritems(new_frames)) return SparsePanel(new_frames, items=items, major_axis=major, minor_axis=minor, default_fill_value=self.default_fill_value, default_kind=self.default_kind) def _combine(self, other, func, axis=0): if isinstance(other, DataFrame): return self._combineFrame(other, func, axis=axis) elif isinstance(other, Panel): return self._combinePanel(other, func) elif np.isscalar(other): new_frames = dict((k, func(v, other)) for k, v in compat.iteritems(self)) return self._new_like(new_frames) def _combineFrame(self, other, func, axis=0): index, columns = self._get_plane_axes(axis) axis = self._get_axis_number(axis) other = other.reindex(index=index, columns=columns) if axis == 0: new_values = func(self.values, other.values) elif axis == 1: new_values = func(self.values.swapaxes(0, 1), other.values.T) new_values = new_values.swapaxes(0, 1) elif axis == 2: new_values = func(self.values.swapaxes(0, 2), other.values) new_values = new_values.swapaxes(0, 2) # TODO: make faster! new_frames = {} for item, item_slice in zip(self.items, new_values): old_frame = self[item] ofv = old_frame.default_fill_value ok = old_frame.default_kind new_frames[item] = SparseDataFrame(item_slice, index=self.major_axis, columns=self.minor_axis, default_fill_value=ofv, default_kind=ok) return self._new_like(new_frames) def _new_like(self, new_frames): return SparsePanel(new_frames, self.items, self.major_axis, self.minor_axis, default_fill_value=self.default_fill_value, default_kind=self.default_kind) def _combinePanel(self, other, func): items = self.items + other.items major = self.major_axis + other.major_axis minor = self.minor_axis + other.minor_axis # could check that everything's the same size, but forget it this = self.reindex(items=items, major=major, minor=minor) other = other.reindex(items=items, major=major, minor=minor) new_frames = {} for item in items: new_frames[item] = func(this[item], other[item]) if not isinstance(other, SparsePanel): new_default_fill = self.default_fill_value else: # maybe unnecessary new_default_fill = func(self.default_fill_value, other.default_fill_value) return SparsePanel(new_frames, items, major, minor, default_fill_value=new_default_fill, default_kind=self.default_kind) def major_xs(self, key): """ Return slice of panel along major axis Parameters ---------- key : object Major axis label Returns ------- y : DataFrame index -> minor axis, columns -> items """ slices = dict((k, v.xs(key)) for k, v in compat.iteritems(self)) return DataFrame(slices, index=self.minor_axis, columns=self.items) def minor_xs(self, key): """ Return slice of panel along minor axis Parameters ---------- key : object Minor axis label Returns ------- y : SparseDataFrame index -> major axis, columns -> items """ slices = dict((k, v[key]) for k, v in compat.iteritems(self)) return SparseDataFrame(slices, index=self.major_axis, columns=self.items, default_fill_value=self.default_fill_value, default_kind=self.default_kind)
class Panel(NDFrame): """ Represents wide format panel data, stored as 3-dimensional array Parameters ---------- data : ndarray (items x major x minor), or dict of DataFrames items : Index or array-like axis=0 major_axis : Index or array-like axis=1 minor_axis : Index or array-like axis=2 dtype : dtype, default None Data type to force, otherwise infer copy : boolean, default False Copy data from inputs. Only affects DataFrame / 2d ndarray input """ @property def _constructor(self): return type(self) _constructor_sliced = DataFrame def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): self._init_data(data=data, items=items, major_axis=major_axis, minor_axis=minor_axis, copy=copy, dtype=dtype) def _init_data(self, data, copy, dtype, **kwargs): """ Generate ND initialization; axes are passed as required objects to __init__ """ if data is None: data = {} if dtype is not None: dtype = self._validate_dtype(dtype) passed_axes = [kwargs.get(a) for a in self._AXIS_ORDERS] axes = None if isinstance(data, BlockManager): if any(x is not None for x in passed_axes): axes = [ x if x is not None else y for x, y in zip(passed_axes, data.axes) ] mgr = data elif isinstance(data, dict): mgr = self._init_dict(data, passed_axes, dtype=dtype) copy = False dtype = None elif isinstance(data, (np.ndarray, list)): mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy) copy = False dtype = None else: # pragma: no cover raise PandasError('Panel constructor not properly called!') NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype) def _init_dict(self, data, axes, dtype=None): haxis = axes.pop(self._info_axis_number) # prefilter if haxis passed if haxis is not None: haxis = _ensure_index(haxis) data = OrderedDict( (k, v) for k, v in compat.iteritems(data) if k in haxis) else: ks = list(data.keys()) if not isinstance(data, OrderedDict): ks = _try_sort(ks) haxis = Index(ks) for k, v in compat.iteritems(data): if isinstance(v, dict): data[k] = self._constructor_sliced(v) # extract axis for remaining axes & create the slicemap raxes = [ self._extract_axis(self, data, axis=i) if a is None else a for i, a in enumerate(axes) ] raxes_sm = self._extract_axes_for_slice(self, raxes) # shallow copy arrays = [] haxis_shape = [len(a) for a in raxes] for h in haxis: v = values = data.get(h) if v is None: values = np.empty(haxis_shape, dtype=dtype) values.fill(np.nan) elif isinstance(v, self._constructor_sliced): d = raxes_sm.copy() d['copy'] = False v = v.reindex(**d) if dtype is not None: v = v.astype(dtype) values = v.values arrays.append(values) return self._init_arrays(arrays, haxis, [haxis] + raxes) def _init_arrays(self, arrays, arr_names, axes): return create_block_manager_from_arrays(arrays, arr_names, axes) @classmethod def from_dict(cls, data, intersect=False, orient='items', dtype=None): """ Construct Panel from dict of DataFrame objects Parameters ---------- data : dict {field : DataFrame} intersect : boolean Intersect indexes of input DataFrames orient : {'items', 'minor'}, default 'items' The "orientation" of the data. If the keys of the passed dict should be the items of the result panel, pass 'items' (default). Otherwise if the columns of the values of the passed DataFrame objects should be the items (which in the case of mixed-dtype data you should do), instead pass 'minor' Returns ------- Panel """ orient = orient.lower() if orient == 'minor': new_data = OrderedDefaultdict(dict) for col, df in compat.iteritems(data): for item, s in compat.iteritems(df): new_data[item][col] = s data = new_data elif orient != 'items': # pragma: no cover raise ValueError('Orientation must be one of {items, minor}.') d = cls._homogenize_dict(cls, data, intersect=intersect, dtype=dtype) ks = list(d['data'].keys()) if not isinstance(d['data'], OrderedDict): ks = list(sorted(ks)) d[cls._info_axis_name] = Index(ks) return cls(**d) def __getitem__(self, key): if isinstance(self._info_axis, MultiIndex): return self._getitem_multilevel(key) return super(Panel, self).__getitem__(key) def _getitem_multilevel(self, key): info = self._info_axis loc = info.get_loc(key) if isinstance(loc, (slice, np.ndarray)): new_index = info[loc] result_index = _maybe_droplevels(new_index, key) slices = [loc] + [slice(None) for x in range(self._AXIS_LEN - 1)] new_values = self.values[slices] d = self._construct_axes_dict(self._AXIS_ORDERS[1:]) d[self._info_axis_name] = result_index result = self._constructor(new_values, **d) return result else: return self._get_item_cache(key) def _init_matrix(self, data, axes, dtype=None, copy=False): values = self._prep_ndarray(self, data, copy=copy) if dtype is not None: try: values = values.astype(dtype) except Exception: raise ValueError('failed to cast to %s' % dtype) shape = values.shape fixed_axes = [] for i, ax in enumerate(axes): if ax is None: ax = _default_index(shape[i]) else: ax = _ensure_index(ax) fixed_axes.append(ax) return create_block_manager_from_blocks([values], fixed_axes) #---------------------------------------------------------------------- # Comparison methods def _compare_constructor(self, other, func): if not self._indexed_same(other): raise Exception('Can only compare identically-labeled ' 'same type objects') new_data = {} for col in self._info_axis: new_data[col] = func(self[col], other[col]) d = self._construct_axes_dict(copy=False) return self._constructor(data=new_data, **d) #---------------------------------------------------------------------- # Magic methods def __unicode__(self): """ Return a string representation for a particular Panel Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ class_name = str(self.__class__) shape = self.shape dims = u('Dimensions: %s') % ' x '.join( ["%d (%s)" % (s, a) for a, s in zip(self._AXIS_ORDERS, shape)]) def axis_pretty(a): v = getattr(self, a) if len(v) > 0: return u('%s axis: %s to %s') % (a.capitalize(), com.pprint_thing(v[0]), com.pprint_thing(v[-1])) else: return u('%s axis: None') % a.capitalize() output = '\n'.join([class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) return output def _get_plane_axes(self, axis): """ Get my plane axes: these are already (as compared with higher level planes), as we are returning a DataFrame axes """ axis = self._get_axis_name(axis) if axis == 'major_axis': index = self.minor_axis columns = self.items if axis == 'minor_axis': index = self.major_axis columns = self.items elif axis == 'items': index = self.major_axis columns = self.minor_axis return index, columns fromDict = from_dict def to_sparse(self, fill_value=None, kind='block'): """ Convert to SparsePanel Parameters ---------- fill_value : float, default NaN kind : {'block', 'integer'} Returns ------- y : SparseDataFrame """ from pandas.core.sparse import SparsePanel frames = dict(compat.iteritems(self)) return SparsePanel(frames, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, default_kind=kind, default_fill_value=fill_value) def to_excel(self, path, na_rep='', engine=None, **kwargs): """ Write each DataFrame in Panel to a separate excel sheet Parameters ---------- path : string or ExcelWriter object File path or existing ExcelWriter na_rep : string, default '' Missing data representation engine : string, default None write engine to use - you can also set this via the options ``io.excel.xlsx.writer``, ``io.excel.xls.writer``, and ``io.excel.xlsm.writer``. Other Parameters ---------------- float_format : string, default None Format string for floating point numbers cols : sequence, optional Columns to write header : boolean or list of string, default True Write out column names. If a list of string is given it is assumed to be aliases for the column names index : boolean, default True Write row names (index) index_label : string or sequence, default None Column label for index column(s) if desired. If None is given, and `header` and `index` are True, then the index names are used. A sequence should be given if the DataFrame uses MultiIndex. startow : upper left cell row to dump data frame startcol : upper left cell column to dump data frame Notes ----- Keyword arguments (and na_rep) are passed to the ``to_excel`` method for each DataFrame written. """ from pandas.io.excel import ExcelWriter if isinstance(path, compat.string_types): writer = ExcelWriter(path, engine=engine) else: writer = path kwargs['na_rep'] = na_rep for item, df in compat.iteritems(self): name = str(item) df.to_excel(writer, name, **kwargs) writer.save() def as_matrix(self): self._consolidate_inplace() return self._data.as_matrix() #---------------------------------------------------------------------- # Getting and setting elements def get_value(self, *args): """ Quickly retrieve single value at (item, major, minor) location Parameters ---------- item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) Returns ------- value : scalar value """ nargs = len(args) nreq = self._AXIS_LEN # require an arg for each axis if nargs != nreq: raise TypeError('There must be an argument for each axis, you gave' ' {0} args, but {1} are required'.format( nargs, nreq)) # hm, two layers to the onion frame = self._get_item_cache(args[0]) return frame.get_value(*args[1:]) def set_value(self, *args): """ Quickly set single value at (item, major, minor) location Parameters ---------- item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) value : scalar Returns ------- panel : Panel If label combo is contained, will be reference to calling Panel, otherwise a new object """ # require an arg for each axis and the value nargs = len(args) nreq = self._AXIS_LEN + 1 if nargs != nreq: raise TypeError('There must be an argument for each axis plus the ' 'value provided, you gave {0} args, but {1} are ' 'required'.format(nargs, nreq)) try: frame = self._get_item_cache(args[0]) frame.set_value(*args[1:]) return self except KeyError: axes = self._expand_axes(args) d = self._construct_axes_dict_from(self, axes, copy=False) result = self.reindex(**d) args = list(args) likely_dtype, args[-1] = _infer_dtype_from_scalar(args[-1]) made_bigger = not np.array_equal(axes[0], self._info_axis) # how to make this logic simpler? if made_bigger: com._possibly_cast_item(result, args[0], likely_dtype) return result.set_value(*args) def _box_item_values(self, key, values): if self.ndim == values.ndim: result = self._constructor(values) # a dup selection will yield a full ndim if result._get_axis(0).is_unique: result = result[key] return result d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]) return self._constructor_sliced(values, **d) def _slice(self, slobj, axis=0, raise_on_error=False, typ=None): new_data = self._data.get_slice(slobj, axis=axis, raise_on_error=raise_on_error) return self._constructor(new_data) def __setitem__(self, key, value): shape = tuple(self.shape) if isinstance(value, self._constructor_sliced): value = value.reindex( **self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:])) mat = value.values elif isinstance(value, np.ndarray): if value.shape != shape[1:]: raise ValueError('shape of value must be {0}, shape of given ' 'object was {1}'.format( shape[1:], tuple(map(int, value.shape)))) mat = np.asarray(value) elif np.isscalar(value): dtype, value = _infer_dtype_from_scalar(value) mat = np.empty(shape[1:], dtype=dtype) mat.fill(value) else: raise TypeError('Cannot set item of type: %s' % str(type(value))) mat = mat.reshape(tuple([1]) + shape[1:]) NDFrame._set_item(self, key, mat) def _unpickle_panel_compat(self, state): # pragma: no cover "Unpickle the panel" _unpickle = com._unpickle_array vals, items, major, minor = state items = _unpickle(items) major = _unpickle(major) minor = _unpickle(minor) values = _unpickle(vals) wp = Panel(values, items, major, minor) self._data = wp._data def conform(self, frame, axis='items'): """ Conform input DataFrame to align with chosen axis pair. Parameters ---------- frame : DataFrame axis : {'items', 'major', 'minor'} Axis the input corresponds to. E.g., if axis='major', then the frame's columns would be items, and the index would be values of the minor axis Returns ------- DataFrame """ axes = self._get_plane_axes(axis) return frame.reindex(**self._extract_axes_for_slice(self, axes)) def head(self, n=5): raise NotImplementedError def tail(self, n=5): raise NotImplementedError def _needs_reindex_multi(self, axes, method, level): # only allowing multi-index on Panel (and not > dims) return method is None and not self._is_mixed_type and self._AXIS_LEN <= 3 and com._count_not_none( *axes.values()) == 3 def _reindex_multi(self, axes, copy, fill_value): """ we are guaranteed non-Nones in the axes! """ items = axes['items'] major = axes['major_axis'] minor = axes['minor_axis'] a0, a1, a2 = len(items), len(major), len(minor) values = self.values new_values = np.empty((a0, a1, a2), dtype=values.dtype) new_items, indexer0 = self.items.reindex(items) new_major, indexer1 = self.major_axis.reindex(major) new_minor, indexer2 = self.minor_axis.reindex(minor) if indexer0 is None: indexer0 = lrange(len(new_items)) if indexer1 is None: indexer1 = lrange(len(new_major)) if indexer2 is None: indexer2 = lrange(len(new_minor)) for i, ind in enumerate(indexer0): com.take_2d_multi(values[ind], (indexer1, indexer2), out=new_values[i]) return Panel(new_values, items=new_items, major_axis=new_major, minor_axis=new_minor) def dropna(self, axis=0, how='any', inplace=False, **kwargs): """ Drop 2D from panel, holding passed axis constant Parameters ---------- axis : int, default 0 Axis to hold constant. E.g. axis=1 will drop major_axis entries having a certain amount of NA data how : {'all', 'any'}, default 'any' 'any': one or more values are NA in the DataFrame along the axis. For 'all' they all must be. inplace : bool, default False If True, do operation inplace and return None. Returns ------- dropped : Panel """ axis = self._get_axis_number(axis) values = self.values mask = com.notnull(values) for ax in reversed(sorted(set(range(self._AXIS_LEN)) - set([axis]))): mask = mask.sum(ax) per_slice = np.prod(values.shape[:axis] + values.shape[axis + 1:]) if how == 'all': cond = mask > 0 else: cond = mask == per_slice new_ax = self._get_axis(axis)[cond] result = self.reindex_axis(new_ax, axis=axis) if inplace: self._update_inplace(result) else: return result def _combine(self, other, func, axis=0): if isinstance(other, Panel): return self._combine_panel(other, func) elif isinstance(other, DataFrame): return self._combine_frame(other, func, axis=axis) elif np.isscalar(other): return self._combine_const(other, func) def _combine_const(self, other, func): new_values = func(self.values, other) d = self._construct_axes_dict() return self._constructor(new_values, **d) def _combine_frame(self, other, func, axis=0): index, columns = self._get_plane_axes(axis) axis = self._get_axis_number(axis) other = other.reindex(index=index, columns=columns) if axis == 0: new_values = func(self.values, other.values) elif axis == 1: new_values = func(self.values.swapaxes(0, 1), other.values.T) new_values = new_values.swapaxes(0, 1) elif axis == 2: new_values = func(self.values.swapaxes(0, 2), other.values) new_values = new_values.swapaxes(0, 2) return self._constructor(new_values, self.items, self.major_axis, self.minor_axis) def _combine_panel(self, other, func): items = self.items + other.items major = self.major_axis + other.major_axis minor = self.minor_axis + other.minor_axis # could check that everything's the same size, but forget it this = self.reindex(items=items, major=major, minor=minor) other = other.reindex(items=items, major=major, minor=minor) result_values = func(this.values, other.values) return self._constructor(result_values, items, major, minor) def major_xs(self, key, copy=True): """ Return slice of panel along major axis Parameters ---------- key : object Major axis label copy : boolean, default False Copy data Returns ------- y : DataFrame index -> minor axis, columns -> items """ return self.xs(key, axis=self._AXIS_LEN - 2, copy=copy) def minor_xs(self, key, copy=True): """ Return slice of panel along minor axis Parameters ---------- key : object Minor axis label copy : boolean, default False Copy data Returns ------- y : DataFrame index -> major axis, columns -> items """ return self.xs(key, axis=self._AXIS_LEN - 1, copy=copy) def xs(self, key, axis=1, copy=True): """ Return slice of panel along selected axis Parameters ---------- key : object Label axis : {'items', 'major', 'minor}, default 1/'major' Returns ------- y : ndim(self)-1 """ axis = self._get_axis_number(axis) if axis == 0: data = self[key] if copy: data = data.copy() return data self._consolidate_inplace() axis_number = self._get_axis_number(axis) new_data = self._data.xs(key, axis=axis_number, copy=copy) return self._construct_return_type(new_data) _xs = xs def _ixs(self, i, axis=0): """ i : int, slice, or sequence of integers axis : int """ key = self._get_axis(axis)[i] # xs cannot handle a non-scalar key, so just reindex here if _is_list_like(key): indexer = {self._get_axis_name(axis): key} return self.reindex(**indexer) # a reduction if axis == 0: values = self._data.iget(i) return self._box_item_values(key, values) # xs by position self._consolidate_inplace() new_data = self._data.xs(i, axis=axis, copy=True, takeable=True) return self._construct_return_type(new_data) def groupby(self, function, axis='major'): """ Group data on given axis, returning GroupBy object Parameters ---------- function : callable Mapping function for chosen access axis : {'major', 'minor', 'items'}, default 'major' Returns ------- grouped : PanelGroupBy """ from pandas.core.groupby import PanelGroupBy axis = self._get_axis_number(axis) return PanelGroupBy(self, function, axis=axis) def to_frame(self, filter_observations=True): """ Transform wide format into long (stacked) format as DataFrame Parameters ---------- filter_observations : boolean, default True Drop (major, minor) pairs without a complete set of observations across all the items Returns ------- y : DataFrame """ _, N, K = self.shape if filter_observations: mask = com.notnull(self.values).all(axis=0) # size = mask.sum() selector = mask.ravel() else: # size = N * K selector = slice(None, None) data = {} for item in self.items: data[item] = self[item].values.ravel()[selector] major_labels = np.arange(N).repeat(K)[selector] # Anyone think of a better way to do this? np.repeat does not # do what I want minor_labels = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)] minor_labels = minor_labels.ravel()[selector] maj_name = self.major_axis.name or 'major' min_name = self.minor_axis.name or 'minor' index = MultiIndex(levels=[self.major_axis, self.minor_axis], labels=[major_labels, minor_labels], names=[maj_name, min_name], verify_integrity=False) return DataFrame(data, index=index, columns=self.items) to_long = deprecate('to_long', to_frame) toLong = deprecate('toLong', to_frame) def apply(self, func, axis='major'): """ Apply Parameters ---------- func : numpy function Signature should match numpy.{sum, mean, var, std} etc. axis : {'major', 'minor', 'items'} fill_value : boolean, default True Replace NaN values with specified first Returns ------- result : DataFrame or Panel """ i = self._get_axis_number(axis) result = np.apply_along_axis(func, i, self.values) return self._wrap_result(result, axis=axis) def _reduce(self, op, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds): axis_name = self._get_axis_name(axis) axis_number = self._get_axis_number(axis_name) f = lambda x: op(x, axis=axis_number, skipna=skipna, **kwds) result = f(self.values) axes = self._get_plane_axes(axis_name) if result.ndim == 2 and axis_name != self._info_axis_name: result = result.T return self._construct_return_type(result, axes) def _construct_return_type(self, result, axes=None, **kwargs): """ return the type for the ndim of the result """ ndim = result.ndim if self.ndim == ndim: """ return the construction dictionary for these axes """ if axes is None: return self._constructor(result) return self._constructor(result, **self._construct_axes_dict()) elif self.ndim == ndim + 1: if axes is None: return self._constructor_sliced(result) return self._constructor_sliced( result, **self._extract_axes_for_slice(self, axes)) raise PandasError( "invalid _construct_return_type [self->%s] [result->%s]" % (self.ndim, result.ndim)) def _wrap_result(self, result, axis): axis = self._get_axis_name(axis) axes = self._get_plane_axes(axis) if result.ndim == 2 and axis != self._info_axis_name: result = result.T return self._construct_return_type(result, axes) @Appender(_shared_docs['reindex'] % _shared_doc_kwargs) def reindex(self, items=None, major_axis=None, minor_axis=None, **kwargs): major_axis = major_axis if major_axis is not None else kwargs.pop( 'major', None) minor_axis = minor_axis if minor_axis is not None else kwargs.pop( 'minor', None) return super(Panel, self).reindex(items=items, major_axis=major_axis, minor_axis=minor_axis, **kwargs) @Appender(_shared_docs['rename'] % _shared_doc_kwargs) def rename(self, items=None, major_axis=None, minor_axis=None, **kwargs): major_axis = major_axis if major_axis is not None else kwargs.pop( 'major', None) minor_axis = minor_axis if minor_axis is not None else kwargs.pop( 'minor', None) return super(Panel, self).rename(items=items, major_axis=major_axis, minor_axis=minor_axis, **kwargs) @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs) def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True, limit=None, fill_value=np.nan): return super(Panel, self).reindex_axis(labels=labels, axis=axis, method=method, level=level, copy=copy, limit=limit, fill_value=fill_value) @Appender(_shared_docs['transpose'] % _shared_doc_kwargs) def transpose(self, *args, **kwargs): return super(Panel, self).transpose(*args, **kwargs) def count(self, axis='major'): """ Return number of observations over requested axis. Parameters ---------- axis : {'items', 'major', 'minor'} or {0, 1, 2} Returns ------- count : DataFrame """ i = self._get_axis_number(axis) values = self.values mask = np.isfinite(values) result = mask.sum(axis=i) return self._wrap_result(result, axis) def shift(self, lags, freq=None, axis='major'): """ Shift major or minor axis by specified number of leads/lags. Drops periods right now compared with DataFrame.shift Parameters ---------- lags : int axis : {'major', 'minor'} Returns ------- shifted : Panel """ values = self.values items = self.items major_axis = self.major_axis minor_axis = self.minor_axis if freq: return self.tshift(lags, freq, axis=axis) if lags > 0: vslicer = slice(None, -lags) islicer = slice(lags, None) elif lags == 0: vslicer = islicer = slice(None) else: vslicer = slice(-lags, None) islicer = slice(None, lags) axis = self._get_axis_name(axis) if axis == 'major_axis': values = values[:, vslicer, :] major_axis = major_axis[islicer] elif axis == 'minor_axis': values = values[:, :, vslicer] minor_axis = minor_axis[islicer] else: raise ValueError('Invalid axis') return self._constructor(values, items=items, major_axis=major_axis, minor_axis=minor_axis) def tshift(self, periods=1, freq=None, axis='major', **kwds): return super(Panel, self).tshift(periods, freq, axis, **kwds) def join(self, other, how='left', lsuffix='', rsuffix=''): """ Join items with other Panel either on major and minor axes column Parameters ---------- other : Panel or list of Panels Index should be similar to one of the columns in this one how : {'left', 'right', 'outer', 'inner'} How to handle indexes of the two objects. Default: 'left' for joining on index, None otherwise * left: use calling frame's index * right: use input frame's index * outer: form union of indexes * inner: use intersection of indexes lsuffix : string Suffix to use from left frame's overlapping columns rsuffix : string Suffix to use from right frame's overlapping columns Returns ------- joined : Panel """ from pandas.tools.merge import concat if isinstance(other, Panel): join_major, join_minor = self._get_join_index(other, how) this = self.reindex(major=join_major, minor=join_minor) other = other.reindex(major=join_major, minor=join_minor) merged_data = this._data.merge(other._data, lsuffix, rsuffix) return self._constructor(merged_data) else: if lsuffix or rsuffix: raise ValueError('Suffixes not supported when passing ' 'multiple panels') if how == 'left': how = 'outer' join_axes = [self.major_axis, self.minor_axis] elif how == 'right': raise ValueError('Right join not supported with multiple ' 'panels') else: join_axes = None return concat([self] + list(other), axis=0, join=how, join_axes=join_axes, verify_integrity=True) def update(self, other, join='left', overwrite=True, filter_func=None, raise_conflict=False): """ Modify Panel in place using non-NA values from passed Panel, or object coercible to Panel. Aligns on items Parameters ---------- other : Panel, or object coercible to Panel join : How to join individual DataFrames {'left', 'right', 'outer', 'inner'}, default 'left' overwrite : boolean, default True If True then overwrite values for common keys in the calling panel filter_func : callable(1d-array) -> 1d-array<boolean>, default None Can choose to replace values other than NA. Return True for values that should be updated raise_conflict : bool If True, will raise an error if a DataFrame and other both contain data in the same place. """ if not isinstance(other, self._constructor): other = self._constructor(other) axis_name = self._info_axis_name axis_values = self._info_axis other = other.reindex(**{axis_name: axis_values}) for frame in axis_values: self[frame].update(other[frame], join, overwrite, filter_func, raise_conflict) def _get_join_index(self, other, how): if how == 'left': join_major, join_minor = self.major_axis, self.minor_axis elif how == 'right': join_major, join_minor = other.major_axis, other.minor_axis elif how == 'inner': join_major = self.major_axis.intersection(other.major_axis) join_minor = self.minor_axis.intersection(other.minor_axis) elif how == 'outer': join_major = self.major_axis.union(other.major_axis) join_minor = self.minor_axis.union(other.minor_axis) return join_major, join_minor # miscellaneous data creation @staticmethod def _extract_axes(self, data, axes, **kwargs): """ return a list of the axis indicies """ return [ self._extract_axis(self, data, axis=i, **kwargs) for i, a in enumerate(axes) ] @staticmethod def _extract_axes_for_slice(self, axes): """ return the slice dictionary for these axes """ return dict([ (self._AXIS_SLICEMAP[i], a) for i, a in zip(self._AXIS_ORDERS[self._AXIS_LEN - len(axes):], axes) ]) @staticmethod def _prep_ndarray(self, values, copy=True): if not isinstance(values, np.ndarray): values = np.asarray(values) # NumPy strings are a pain, convert to object if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype=object, copy=True) else: if copy: values = values.copy() if values.ndim != self._AXIS_LEN: raise ValueError("The number of dimensions required is {0}, " "but the number of dimensions of the " "ndarray given was {1}".format( self._AXIS_LEN, values.ndim)) return values @staticmethod def _homogenize_dict(self, frames, intersect=True, dtype=None): """ Conform set of _constructor_sliced-like objects to either an intersection of indices / columns or a union. Parameters ---------- frames : dict intersect : boolean, default True Returns ------- dict of aligned results & indicies """ result = dict() # caller differs dict/ODict, presered type if isinstance(frames, OrderedDict): result = OrderedDict() adj_frames = OrderedDict() for k, v in compat.iteritems(frames): if isinstance(v, dict): adj_frames[k] = self._constructor_sliced(v) else: adj_frames[k] = v axes = self._AXIS_ORDERS[1:] axes_dict = dict([(a, ax) for a, ax in zip( axes, self._extract_axes(self, adj_frames, axes, intersect=intersect))]) reindex_dict = dict([(self._AXIS_SLICEMAP[a], axes_dict[a]) for a in axes]) reindex_dict['copy'] = False for key, frame in compat.iteritems(adj_frames): if frame is not None: result[key] = frame.reindex(**reindex_dict) else: result[key] = None axes_dict['data'] = result return axes_dict @staticmethod def _extract_axis(self, data, axis=0, intersect=False): index = None if len(data) == 0: index = Index([]) elif len(data) > 0: raw_lengths = [] indexes = [] have_raw_arrays = False have_frames = False for v in data.values(): if isinstance(v, self._constructor_sliced): have_frames = True indexes.append(v._get_axis(axis)) elif v is not None: have_raw_arrays = True raw_lengths.append(v.shape[axis]) if have_frames: index = _get_combined_index(indexes, intersect=intersect) if have_raw_arrays: lengths = list(set(raw_lengths)) if len(lengths) > 1: raise ValueError('ndarrays must match shape on axis %d' % axis) if have_frames: if lengths[0] != len(index): raise AssertionError('Length of data and index must match') else: index = Index(np.arange(lengths[0])) if index is None: index = Index([]) return _ensure_index(index) @classmethod def _add_aggregate_operations(cls, use_numexpr=True): """ add the operations to the cls; evaluate the doc strings again """ # doc strings substitors _agg_doc = """ Wrapper method for %s Parameters ---------- other : """ + "%s or %s" % (cls._constructor_sliced.__name__, cls.__name__) + """ axis : {""" + ', '.join(cls._AXIS_ORDERS) + "}" + """ Axis to broadcast over Returns ------- """ + cls.__name__ + "\n" def _panel_arith_method(op, name, str_rep=None, default_axis=None, fill_zeros=None, **eval_kwargs): def na_op(x, y): try: result = expressions.evaluate(op, str_rep, x, y, raise_on_error=True, **eval_kwargs) except TypeError: result = op(x, y) # handles discrepancy between numpy and numexpr on division/mod by 0 # though, given that these are generally (always?) non-scalars, I'm # not sure whether it's worth it at the moment result = com._fill_zeros(result, y, fill_zeros) return result @Substitution(name) @Appender(_agg_doc) def f(self, other, axis=0): return self._combine(other, na_op, axis=axis) f.__name__ = name return f # add `div`, `mul`, `pow`, etc.. ops.add_flex_arithmetic_methods( cls, _panel_arith_method, use_numexpr=use_numexpr, flex_comp_method=ops._comp_method_PANEL)
class Panel(NDFrame): _AXIS_NUMBERS = {'items': 0, 'major_axis': 1, 'minor_axis': 2} _AXIS_ALIASES = {'major': 'major_axis', 'minor': 'minor_axis'} _AXIS_NAMES = {0: 'items', 1: 'major_axis', 2: 'minor_axis'} # major _default_stat_axis = 1 _het_axis = 0 items = lib.AxisProperty(0) major_axis = lib.AxisProperty(1) minor_axis = lib.AxisProperty(2) __add__ = _arith_method(operator.add, '__add__') __sub__ = _arith_method(operator.sub, '__sub__') __truediv__ = _arith_method(operator.truediv, '__truediv__') __floordiv__ = _arith_method(operator.floordiv, '__floordiv__') __mul__ = _arith_method(operator.mul, '__mul__') __pow__ = _arith_method(operator.pow, '__pow__') __radd__ = _arith_method(operator.add, '__radd__') __rmul__ = _arith_method(operator.mul, '__rmul__') __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__') __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__') __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__') __rpow__ = _arith_method(lambda x, y: y**x, '__rpow__') if not py3compat.PY3: __div__ = _arith_method(operator.div, '__div__') __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__') def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): """ Represents wide format panel data, stored as 3-dimensional array Parameters ---------- data : ndarray (items x major x minor), or dict of DataFrames items : Index or array-like axis=1 major_axis : Index or array-like axis=1 minor_axis : Index or array-like axis=2 dtype : dtype, default None Data type to force, otherwise infer copy : boolean, default False Copy data from inputs. Only affects DataFrame / 2d ndarray input """ if data is None: data = {} passed_axes = [items, major_axis, minor_axis] axes = None if isinstance(data, BlockManager): if any(x is not None for x in passed_axes): axes = [ x if x is not None else y for x, y in zip(passed_axes, data.axes) ] mgr = data elif isinstance(data, dict): mgr = self._init_dict(data, passed_axes, dtype=dtype) copy = False dtype = None elif isinstance(data, (np.ndarray, list)): mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy) copy = False dtype = None else: # pragma: no cover raise PandasError('Panel constructor not properly called!') NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype) @classmethod def _from_axes(cls, data, axes): # for construction from BlockManager if isinstance(data, BlockManager): return cls(data) else: items, major, minor = axes return cls(data, items=items, major_axis=major, minor_axis=minor, copy=False) def _init_dict(self, data, axes, dtype=None): items, major, minor = axes # prefilter if items passed if items is not None: items = _ensure_index(items) data = dict((k, v) for k, v in data.iteritems() if k in items) else: items = Index(_try_sort(data.keys())) for k, v in data.iteritems(): if isinstance(v, dict): data[k] = DataFrame(v) if major is None: major = _extract_axis(data, axis=0) if minor is None: minor = _extract_axis(data, axis=1) axes = [items, major, minor] arrays = [] item_shape = len(major), len(minor) for item in items: v = values = data.get(item) if v is None: values = np.empty(item_shape, dtype=dtype) values.fill(np.nan) elif isinstance(v, DataFrame): v = v.reindex(index=major, columns=minor, copy=False) if dtype is not None: v = v.astype(dtype) values = v.values arrays.append(values) return self._init_arrays(arrays, items, axes) def _init_arrays(self, arrays, arr_names, axes): # segregates dtypes and forms blocks matching to columns blocks = form_blocks(arrays, arr_names, axes) mgr = BlockManager(blocks, axes).consolidate() return mgr @property def shape(self): return len(self.items), len(self.major_axis), len(self.minor_axis) @classmethod def from_dict(cls, data, intersect=False, orient='items', dtype=None): """ Construct Panel from dict of DataFrame objects Parameters ---------- data : dict {field : DataFrame} intersect : boolean Intersect indexes of input DataFrames orient : {'items', 'minor'}, default 'items' The "orientation" of the data. If the keys of the passed dict should be the items of the result panel, pass 'items' (default). Otherwise if the columns of the values of the passed DataFrame objects should be the items (which in the case of mixed-dtype data you should do), instead pass 'minor' Returns ------- Panel """ from collections import defaultdict orient = orient.lower() if orient == 'minor': new_data = defaultdict(dict) for col, df in data.iteritems(): for item, s in df.iteritems(): new_data[item][col] = s data = new_data elif orient != 'items': # pragma: no cover raise ValueError('only recognize items or minor for orientation') data, index, columns = _homogenize_dict(data, intersect=intersect, dtype=dtype) items = Index(sorted(data.keys())) return cls(data, items, index, columns) def __getitem__(self, key): if isinstance(self.items, MultiIndex): return self._getitem_multilevel(key) return super(Panel, self).__getitem__(key) def _getitem_multilevel(self, key): loc = self.items.get_loc(key) if isinstance(loc, (slice, np.ndarray)): new_index = self.items[loc] result_index = _maybe_droplevels(new_index, key) new_values = self.values[loc, :, :] result = Panel(new_values, items=result_index, major_axis=self.major_axis, minor_axis=self.minor_axis) return result else: return self._get_item_cache(key) def _init_matrix(self, data, axes, dtype=None, copy=False): values = _prep_ndarray(data, copy=copy) if dtype is not None: try: values = values.astype(dtype) except Exception: raise ValueError('failed to cast to %s' % dtype) shape = values.shape fixed_axes = [] for i, ax in enumerate(axes): if ax is None: ax = _default_index(shape[i]) else: ax = _ensure_index(ax) fixed_axes.append(ax) items = fixed_axes[0] block = make_block(values, items, items) return BlockManager([block], fixed_axes) #---------------------------------------------------------------------- # Array interface def __array__(self, dtype=None): return self.values def __array_wrap__(self, result): return self._constructor(result, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, copy=False) #---------------------------------------------------------------------- # Magic methods def __str__(self): """ Return a string representation for a particular Panel Invoked by str(df) in both py2/py3. Yields Bytestring in Py2, Unicode String in py3. """ if py3compat.PY3: return self.__unicode__() return self.__bytes__() def __bytes__(self): """ Return a string representation for a particular Panel Invoked by bytes(df) in py3 only. Yields a bytestring in both py2/py3. """ return com.console_encode(self.__unicode__()) def __unicode__(self): """ Return a string representation for a particular Panel Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ class_name = str(self.__class__) I, N, K = len(self.items), len(self.major_axis), len(self.minor_axis) dims = u'Dimensions: %d (items) x %d (major) x %d (minor)' % (I, N, K) if len(self.major_axis) > 0: major = u'Major axis: %s to %s' % (self.major_axis[0], self.major_axis[-1]) else: major = u'Major axis: None' if len(self.minor_axis) > 0: minor = u'Minor axis: %s to %s' % (com.pprint_thing( self.minor_axis[0]), com.pprint_thing(self.minor_axis[-1])) else: minor = u'Minor axis: None' if len(self.items) > 0: items = u'Items: %s to %s' % (com.pprint_thing( self.items[0]), com.pprint_thing(self.items[-1])) else: items = u'Items: None' output = u'%s\n%s\n%s\n%s\n%s' % (class_name, dims, items, major, minor) return output def __repr__(self): """ Return a string representation for a particular Panel Yields Bytestring in Py2, Unicode String in py3. """ return str(self) def __iter__(self): return iter(self.items) def iteritems(self): for item in self.items: yield item, self[item] # Name that won't get automatically converted to items by 2to3. items is # already in use for the first axis. iterkv = iteritems def _get_plane_axes(self, axis): """ """ axis = self._get_axis_name(axis) if axis == 'major_axis': index = self.minor_axis columns = self.items if axis == 'minor_axis': index = self.major_axis columns = self.items elif axis == 'items': index = self.major_axis columns = self.minor_axis return index, columns @property def _constructor(self): return type(self) # Fancy indexing _ix = None @property def ix(self): if self._ix is None: self._ix = _NDFrameIndexer(self) return self._ix def _wrap_array(self, arr, axes, copy=False): items, major, minor = axes return self._constructor(arr, items=items, major_axis=major, minor_axis=minor, copy=copy) fromDict = from_dict def to_sparse(self, fill_value=None, kind='block'): """ Convert to SparsePanel Parameters ---------- fill_value : float, default NaN kind : {'block', 'integer'} Returns ------- y : SparseDataFrame """ from pandas.core.sparse import SparsePanel frames = dict(self.iterkv()) return SparsePanel(frames, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, default_kind=kind, default_fill_value=fill_value) def to_excel(self, path, na_rep=''): """ Write each DataFrame in Panel to a separate excel sheet Parameters ---------- excel_writer : string or ExcelWriter object File path or existing ExcelWriter na_rep : string, default '' Missing data representation """ from pandas.io.parsers import ExcelWriter writer = ExcelWriter(path) for item, df in self.iteritems(): name = str(item) df.to_excel(writer, name, na_rep=na_rep) writer.save() # TODO: needed? def keys(self): return list(self.items) def _get_values(self): self._consolidate_inplace() return self._data.as_matrix() values = property(fget=_get_values) #---------------------------------------------------------------------- # Getting and setting elements def get_value(self, item, major, minor): """ Quickly retrieve single value at (item, major, minor) location Parameters ---------- item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) Returns ------- value : scalar value """ # hm, two layers to the onion frame = self._get_item_cache(item) return frame.get_value(major, minor) def set_value(self, item, major, minor, value): """ Quickly set single value at (item, major, minor) location Parameters ---------- item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) value : scalar Returns ------- panel : Panel If label combo is contained, will be reference to calling Panel, otherwise a new object """ try: frame = self._get_item_cache(item) frame.set_value(major, minor, value) return self except KeyError: ax1, ax2, ax3 = self._expand_axes((item, major, minor)) result = self.reindex(items=ax1, major=ax2, minor=ax3, copy=False) likely_dtype = com._infer_dtype(value) made_bigger = not np.array_equal(ax1, self.items) # how to make this logic simpler? if made_bigger: com._possibly_cast_item(result, item, likely_dtype) return result.set_value(item, major, minor, value) def _box_item_values(self, key, values): return DataFrame(values, index=self.major_axis, columns=self.minor_axis) def __getattr__(self, name): """After regular attribute access, try looking up the name of an item. This allows simpler access to items for interactive use.""" if name in self.items: return self[name] raise AttributeError("'%s' object has no attribute '%s'" % (type(self).__name__, name)) def _slice(self, slobj, axis=0): new_data = self._data.get_slice(slobj, axis=axis) return self._constructor(new_data) def __setitem__(self, key, value): _, N, K = self.shape if isinstance(value, DataFrame): value = value.reindex(index=self.major_axis, columns=self.minor_axis) mat = value.values elif isinstance(value, np.ndarray): if value.shape != (N, K): raise AssertionError( ('Shape of values must be (%d, %d), ' 'not (%d, %d)') % ((N, K) + values.shape)) mat = np.asarray(value) elif np.isscalar(value): dtype = _infer_dtype(value) mat = np.empty((N, K), dtype=dtype) mat.fill(value) else: raise TypeError('Cannot set item of type: %s' % str(type(value))) mat = mat.reshape((1, N, K)) NDFrame._set_item(self, key, mat) def pop(self, item): """ Return item slice from panel and delete from panel Parameters ---------- key : object Must be contained in panel's items Returns ------- y : DataFrame """ return NDFrame.pop(self, item) def __getstate__(self): "Returned pickled representation of the panel" return self._data def __setstate__(self, state): # old Panel pickle if isinstance(state, BlockManager): self._data = state elif len(state) == 4: # pragma: no cover self._unpickle_panel_compat(state) else: # pragma: no cover raise ValueError('unrecognized pickle') self._item_cache = {} def _unpickle_panel_compat(self, state): # pragma: no cover "Unpickle the panel" _unpickle = com._unpickle_array vals, items, major, minor = state items = _unpickle(items) major = _unpickle(major) minor = _unpickle(minor) values = _unpickle(vals) wp = Panel(values, items, major, minor) self._data = wp._data def conform(self, frame, axis='items'): """ Conform input DataFrame to align with chosen axis pair. Parameters ---------- frame : DataFrame axis : {'items', 'major', 'minor'} Axis the input corresponds to. E.g., if axis='major', then the frame's columns would be items, and the index would be values of the minor axis Returns ------- DataFrame """ index, columns = self._get_plane_axes(axis) return frame.reindex(index=index, columns=columns) def reindex(self, major=None, items=None, minor=None, method=None, major_axis=None, minor_axis=None, copy=True): """ Conform panel to new axis or axes Parameters ---------- major : Index or sequence, default None Can also use 'major_axis' keyword items : Index or sequence, default None minor : Index or sequence, default None Can also use 'minor_axis' keyword method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap copy : boolean, default True Return a new object, even if the passed indexes are the same Returns ------- Panel (new object) """ result = self major = _mut_exclusive(major, major_axis) minor = _mut_exclusive(minor, minor_axis) if (method is None and not self._is_mixed_type and com._count_not_none(items, major, minor) == 3): return self._reindex_multi(items, major, minor) if major is not None: result = result._reindex_axis(major, method, 1, copy) if minor is not None: result = result._reindex_axis(minor, method, 2, copy) if items is not None: result = result._reindex_axis(items, method, 0, copy) if result is self and copy: raise ValueError('Must specify at least one axis') return result def _reindex_multi(self, items, major, minor): a0, a1, a2 = len(items), len(major), len(minor) values = self.values new_values = np.empty((a0, a1, a2), dtype=values.dtype) new_items, indexer0 = self.items.reindex(items) new_major, indexer1 = self.major_axis.reindex(major) new_minor, indexer2 = self.minor_axis.reindex(minor) if indexer0 is None: indexer0 = range(len(new_items)) if indexer1 is None: indexer1 = range(len(new_major)) if indexer2 is None: indexer2 = range(len(new_minor)) for i, ind in enumerate(indexer0): com.take_2d_multi(values[ind], indexer1, indexer2, out=new_values[i]) return Panel(new_values, items=new_items, major_axis=new_major, minor_axis=new_minor) def reindex_axis(self, labels, axis=0, method=None, level=None, copy=True): """Conform Panel to new index with optional filling logic, placing NA/NaN in locations having no value in the previous index. A new object is produced unless the new index is equivalent to the current one and copy=False Parameters ---------- index : array-like, optional New labels / index to conform to. Preferably an Index object to avoid duplicating data axis : {0, 1} 0 -> index (rows) 1 -> columns method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed DataFrame pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap copy : boolean, default True Return a new object, even if the passed indexes are the same level : int or name Broadcast across a level, matching Index values on the passed MultiIndex level Returns ------- reindexed : Panel """ self._consolidate_inplace() return self._reindex_axis(labels, method, axis, copy) def reindex_like(self, other, method=None): """ Reindex Panel to match indices of another Panel Parameters ---------- other : Panel method : string or None Returns ------- reindexed : Panel """ # todo: object columns return self.reindex(major=other.major_axis, items=other.items, minor=other.minor_axis, method=method) def dropna(self, axis=0, how='any'): """ Drop 2D from panel, holding passed axis constant Parameters ---------- axis : int, default 0 Axis to hold constant. E.g. axis=1 will drop major_axis entries having a certain amount of NA data how : {'all', 'any'}, default 'any' 'any': one or more values are NA in the DataFrame along the axis. For 'all' they all must be. Returns ------- dropped : Panel """ axis = self._get_axis_number(axis) values = self.values mask = com.notnull(values) for ax in reversed(sorted(set(range(3)) - set([axis]))): mask = mask.sum(ax) per_slice = np.prod(values.shape[:axis] + values.shape[axis + 1:]) if how == 'all': cond = mask > 0 else: cond = mask == per_slice new_ax = self._get_axis(axis)[cond] return self.reindex_axis(new_ax, axis=axis) def _combine(self, other, func, axis=0): if isinstance(other, Panel): return self._combine_panel(other, func) elif isinstance(other, DataFrame): return self._combine_frame(other, func, axis=axis) elif np.isscalar(other): new_values = func(self.values, other) return self._constructor(new_values, self.items, self.major_axis, self.minor_axis) def __neg__(self): return -1 * self def _combine_frame(self, other, func, axis=0): index, columns = self._get_plane_axes(axis) axis = self._get_axis_number(axis) other = other.reindex(index=index, columns=columns) if axis == 0: new_values = func(self.values, other.values) elif axis == 1: new_values = func(self.values.swapaxes(0, 1), other.values.T) new_values = new_values.swapaxes(0, 1) elif axis == 2: new_values = func(self.values.swapaxes(0, 2), other.values) new_values = new_values.swapaxes(0, 2) return self._constructor(new_values, self.items, self.major_axis, self.minor_axis) def _combine_panel(self, other, func): items = self.items + other.items major = self.major_axis + other.major_axis minor = self.minor_axis + other.minor_axis # could check that everything's the same size, but forget it this = self.reindex(items=items, major=major, minor=minor) other = other.reindex(items=items, major=major, minor=minor) result_values = func(this.values, other.values) return self._constructor(result_values, items, major, minor) def fillna(self, value=None, method='pad'): """ Fill NaN values using the specified method. Member Series / TimeSeries are filled separately. Parameters ---------- value : any kind (should be same type as array) Value to use to fill holes (e.g. 0) method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad' Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap Returns ------- y : DataFrame See also -------- DataFrame.reindex, DataFrame.asfreq """ if value is None: result = {} for col, s in self.iterkv(): result[col] = s.fillna(method=method, value=value) return self._constructor.from_dict(result) else: new_data = self._data.fillna(value) return self._constructor(new_data) add = _panel_arith_method(operator.add, 'add') subtract = sub = _panel_arith_method(operator.sub, 'subtract') multiply = mul = _panel_arith_method(operator.mul, 'multiply') try: divide = div = _panel_arith_method(operator.div, 'divide') except AttributeError: # pragma: no cover # Python 3 divide = div = _panel_arith_method(operator.truediv, 'divide') def major_xs(self, key, copy=True): """ Return slice of panel along major axis Parameters ---------- key : object Major axis label copy : boolean, default False Copy data Returns ------- y : DataFrame index -> minor axis, columns -> items """ return self.xs(key, axis=1, copy=copy) def minor_xs(self, key, copy=True): """ Return slice of panel along minor axis Parameters ---------- key : object Minor axis label copy : boolean, default False Copy data Returns ------- y : DataFrame index -> major axis, columns -> items """ return self.xs(key, axis=2, copy=copy) def xs(self, key, axis=1, copy=True): """ Return slice of panel along selected axis Parameters ---------- key : object Label axis : {'items', 'major', 'minor}, default 1/'major' Returns ------- y : DataFrame """ if axis == 0: data = self[key] if copy: data = data.copy() return data self._consolidate_inplace() axis_number = self._get_axis_number(axis) new_data = self._data.xs(key, axis=axis_number, copy=copy) return DataFrame(new_data) def _ixs(self, i, axis=0): # for compatibility with .ix indexing # Won't work with hierarchical indexing yet key = self._get_axis(axis)[i] return self.xs(key, axis=axis) def groupby(self, function, axis='major'): """ Group data on given axis, returning GroupBy object Parameters ---------- function : callable Mapping function for chosen access axis : {'major', 'minor', 'items'}, default 'major' Returns ------- grouped : PanelGroupBy """ from pandas.core.groupby import PanelGroupBy axis = self._get_axis_number(axis) return PanelGroupBy(self, function, axis=axis) def swapaxes(self, axis1='major', axis2='minor', copy=True): """ Interchange axes and swap values axes appropriately Returns ------- y : Panel (new object) """ i = self._get_axis_number(axis1) j = self._get_axis_number(axis2) if i == j: raise ValueError('Cannot specify the same axis') mapping = {i: j, j: i} new_axes = (self._get_axis(mapping.get(k, k)) for k in range(3)) new_values = self.values.swapaxes(i, j) if copy: new_values = new_values.copy() return self._constructor(new_values, *new_axes) def transpose(self, items='items', major='major', minor='minor', copy=False): """ Permute the dimensions of the Panel Parameters ---------- items : int or one of {'items', 'major', 'minor'} major : int or one of {'items', 'major', 'minor'} minor : int or one of {'items', 'major', 'minor'} copy : boolean, default False Make a copy of the underlying data. Mixed-dtype data will always result in a copy Examples -------- >>> p.transpose(2, 0, 1) >>> p.transpose(2, 0, 1, copy=True) Returns ------- y : Panel (new object) """ i, j, k = [self._get_axis_number(x) for x in [items, major, minor]] if i == j or i == k or j == k: raise ValueError('Must specify 3 unique axes') new_axes = [self._get_axis(x) for x in [i, j, k]] new_values = self.values.transpose((i, j, k)) if copy: new_values = new_values.copy() return self._constructor(new_values, *new_axes) def to_frame(self, filter_observations=True): """ Transform wide format into long (stacked) format as DataFrame Parameters ---------- filter_observations : boolean, default True Drop (major, minor) pairs without a complete set of observations across all the items Returns ------- y : DataFrame """ _, N, K = self.shape if filter_observations: mask = com.notnull(self.values).all(axis=0) # size = mask.sum() selector = mask.ravel() else: # size = N * K selector = slice(None, None) data = {} for item in self.items: data[item] = self[item].values.ravel()[selector] major_labels = np.arange(N).repeat(K)[selector] # Anyone think of a better way to do this? np.repeat does not # do what I want minor_labels = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)] minor_labels = minor_labels.ravel()[selector] maj_name = self.major_axis.name or 'major' min_name = self.minor_axis.name or 'minor' index = MultiIndex(levels=[self.major_axis, self.minor_axis], labels=[major_labels, minor_labels], names=[maj_name, min_name]) return DataFrame(data, index=index, columns=self.items) to_long = deprecate('to_long', to_frame) toLong = deprecate('toLong', to_frame) def filter(self, items): """ Restrict items in panel to input list Parameters ---------- items : sequence Returns ------- y : Panel """ intersection = self.items.intersection(items) return self.reindex(items=intersection) def apply(self, func, axis='major'): """ Apply Parameters ---------- func : numpy function Signature should match numpy.{sum, mean, var, std} etc. axis : {'major', 'minor', 'items'} fill_value : boolean, default True Replace NaN values with specified first Returns ------- result : DataFrame or Panel """ i = self._get_axis_number(axis) result = np.apply_along_axis(func, i, self.values) return self._wrap_result(result, axis=axis) def _reduce(self, op, axis=0, skipna=True): axis_name = self._get_axis_name(axis) axis_number = self._get_axis_number(axis_name) f = lambda x: op(x, axis=axis_number, skipna=skipna) result = f(self.values) index, columns = self._get_plane_axes(axis_name) if axis_name != 'items': result = result.T return DataFrame(result, index=index, columns=columns) def _wrap_result(self, result, axis): axis = self._get_axis_name(axis) index, columns = self._get_plane_axes(axis) if axis != 'items': result = result.T return DataFrame(result, index=index, columns=columns) def count(self, axis='major'): """ Return number of observations over requested axis. Parameters ---------- axis : {'items', 'major', 'minor'} or {0, 1, 2} Returns ------- count : DataFrame """ i = self._get_axis_number(axis) values = self.values mask = np.isfinite(values) result = mask.sum(axis=i) return self._wrap_result(result, axis) @Substitution(desc='sum', outname='sum') @Appender(_agg_doc) def sum(self, axis='major', skipna=True): return self._reduce(nanops.nansum, axis=axis, skipna=skipna) @Substitution(desc='mean', outname='mean') @Appender(_agg_doc) def mean(self, axis='major', skipna=True): return self._reduce(nanops.nanmean, axis=axis, skipna=skipna) @Substitution(desc='unbiased variance', outname='variance') @Appender(_agg_doc) def var(self, axis='major', skipna=True): return self._reduce(nanops.nanvar, axis=axis, skipna=skipna) @Substitution(desc='unbiased standard deviation', outname='stdev') @Appender(_agg_doc) def std(self, axis='major', skipna=True): return self.var(axis=axis, skipna=skipna).apply(np.sqrt) @Substitution(desc='unbiased skewness', outname='skew') @Appender(_agg_doc) def skew(self, axis='major', skipna=True): return self._reduce(nanops.nanskew, axis=axis, skipna=skipna) @Substitution(desc='product', outname='prod') @Appender(_agg_doc) def prod(self, axis='major', skipna=True): return self._reduce(nanops.nanprod, axis=axis, skipna=skipna) @Substitution(desc='compounded percentage', outname='compounded') @Appender(_agg_doc) def compound(self, axis='major', skipna=True): return (1 + self).prod(axis=axis, skipna=skipna) - 1 @Substitution(desc='median', outname='median') @Appender(_agg_doc) def median(self, axis='major', skipna=True): return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna) @Substitution(desc='maximum', outname='maximum') @Appender(_agg_doc) def max(self, axis='major', skipna=True): return self._reduce(nanops.nanmax, axis=axis, skipna=skipna) @Substitution(desc='minimum', outname='minimum') @Appender(_agg_doc) def min(self, axis='major', skipna=True): return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) def shift(self, lags, axis='major'): """ Shift major or minor axis by specified number of leads/lags. Drops periods right now compared with DataFrame.shift Parameters ---------- lags : int axis : {'major', 'minor'} Returns ------- shifted : Panel """ values = self.values items = self.items major_axis = self.major_axis minor_axis = self.minor_axis if lags > 0: vslicer = slice(None, -lags) islicer = slice(lags, None) elif lags == 0: vslicer = islicer = slice(None) else: vslicer = slice(-lags, None) islicer = slice(None, lags) if axis == 'major': values = values[:, vslicer, :] major_axis = major_axis[islicer] elif axis == 'minor': values = values[:, :, vslicer] minor_axis = minor_axis[islicer] else: raise ValueError('Invalid axis') return self._constructor(values, items=items, major_axis=major_axis, minor_axis=minor_axis) def truncate(self, before=None, after=None, axis='major'): """Function truncates a sorted Panel before and/or after some particular values on the requested axis Parameters ---------- before : date Left boundary after : date Right boundary axis : {'major', 'minor', 'items'} Returns ------- Panel """ axis = self._get_axis_name(axis) index = self._get_axis(axis) beg_slice, end_slice = index.slice_locs(before, after) new_index = index[beg_slice:end_slice] return self.reindex(**{axis: new_index}) def join(self, other, how='left', lsuffix='', rsuffix=''): """ Join items with other Panel either on major and minor axes column Parameters ---------- other : Panel or list of Panels Index should be similar to one of the columns in this one how : {'left', 'right', 'outer', 'inner'} How to handle indexes of the two objects. Default: 'left' for joining on index, None otherwise * left: use calling frame's index * right: use input frame's index * outer: form union of indexes * inner: use intersection of indexes lsuffix : string Suffix to use from left frame's overlapping columns rsuffix : string Suffix to use from right frame's overlapping columns Returns ------- joined : Panel """ from pandas.tools.merge import concat if isinstance(other, Panel): join_major, join_minor = self._get_join_index(other, how) this = self.reindex(major=join_major, minor=join_minor) other = other.reindex(major=join_major, minor=join_minor) merged_data = this._data.merge(other._data, lsuffix, rsuffix) return self._constructor(merged_data) else: if lsuffix or rsuffix: raise ValueError('Suffixes not supported when passing ' 'multiple panels') if how == 'left': how = 'outer' join_axes = [self.major_axis, self.minor_axis] elif how == 'right': raise ValueError('Right join not supported with multiple ' 'panels') else: join_axes = None return concat([self] + list(other), axis=0, join=how, join_axes=join_axes, verify_integrity=True) def update(self, other, join='left', overwrite=True, filter_func=None, raise_conflict=False): """ Modify Panel in place using non-NA values from passed Panel, or object coercible to Panel. Aligns on items Parameters ---------- other : Panel, or object coercible to Panel join : How to join individual DataFrames {'left', 'right', 'outer', 'inner'}, default 'left' overwrite : boolean, default True If True then overwrite values for common keys in the calling panel filter_func : callable(1d-array) -> 1d-array<boolean>, default None Can choose to replace values other than NA. Return True for values that should be updated raise_conflict : bool If True, will raise an error if a DataFrame and other both contain data in the same place. """ if not isinstance(other, Panel): other = Panel(other) other = other.reindex(items=self.items) for frame in self.items: self[frame].update(other[frame], join, overwrite, filter_func, raise_conflict) def _get_join_index(self, other, how): if how == 'left': join_major, join_minor = self.major_axis, self.minor_axis elif how == 'right': join_major, join_minor = other.major_axis, other.minor_axis elif how == 'inner': join_major = self.major_axis.intersection(other.major_axis) join_minor = self.minor_axis.intersection(other.minor_axis) elif how == 'outer': join_major = self.major_axis.union(other.major_axis) join_minor = self.minor_axis.union(other.minor_axis) return join_major, join_minor
class Panel(NDFrame): _AXIS_NUMBERS = { 'items' : 0, 'major_axis' : 1, 'minor_axis' : 2 } _AXIS_ALIASES = { 'major' : 'major_axis', 'minor' : 'minor_axis' } _AXIS_NAMES = { 0 : 'items', 1 : 'major_axis', 2 : 'minor_axis' } # major _default_stat_axis = 1 _het_axis = 0 items = lib.AxisProperty(0) major_axis = lib.AxisProperty(1) minor_axis = lib.AxisProperty(2) __add__ = _arith_method(operator.add, '__add__') __sub__ = _arith_method(operator.sub, '__sub__') __truediv__ = _arith_method(operator.truediv, '__truediv__') __floordiv__ = _arith_method(operator.floordiv, '__floordiv__') __mul__ = _arith_method(operator.mul, '__mul__') __pow__ = _arith_method(operator.pow, '__pow__') __radd__ = _arith_method(operator.add, '__radd__') __rmul__ = _arith_method(operator.mul, '__rmul__') __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__') __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__') __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__') __rpow__ = _arith_method(lambda x, y: y ** x, '__rpow__') if not py3compat.PY3: __div__ = _arith_method(operator.div, '__div__') __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__') def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): """ Represents wide format panel data, stored as 3-dimensional array Parameters ---------- data : ndarray (items x major x minor), or dict of DataFrames items : Index or array-like axis=1 major_axis : Index or array-like axis=1 minor_axis : Index or array-like axis=2 dtype : dtype, default None Data type to force, otherwise infer copy : boolean, default False Copy data from inputs. Only affects DataFrame / 2d ndarray input """ if data is None: data = {} passed_axes = [items, major_axis, minor_axis] axes = None if isinstance(data, BlockManager): if any(x is not None for x in passed_axes): axes = [x if x is not None else y for x, y in zip(passed_axes, data.axes)] mgr = data elif isinstance(data, dict): mgr = self._init_dict(data, passed_axes, dtype=dtype) copy = False dtype = None elif isinstance(data, (np.ndarray, list)): mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy) copy = False dtype = None else: # pragma: no cover raise PandasError('Panel constructor not properly called!') NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype) @classmethod def _from_axes(cls, data, axes): # for construction from BlockManager if isinstance(data, BlockManager): return cls(data) else: items, major, minor = axes return cls(data, items=items, major_axis=major, minor_axis=minor, copy=False) def _init_dict(self, data, axes, dtype=None): items, major, minor = axes # prefilter if items passed if items is not None: items = _ensure_index(items) data = dict((k, v) for k, v in data.iteritems() if k in items) else: items = Index(_try_sort(data.keys())) for k, v in data.iteritems(): if not isinstance(v, DataFrame): data[k] = DataFrame(v) if major is None: indexes = [v.index for v in data.values()] major = _union_indexes(indexes) if minor is None: indexes = [v.columns for v in data.values()] minor = _union_indexes(indexes) axes = [items, major, minor] reshaped_data = data.copy() # shallow # homogenize item_shape = (1, len(major), len(minor)) for k in items: if k not in data: values = np.empty(item_shape, dtype=dtype) values.fill(np.nan) reshaped_data[k] = values else: v = data[k] v = v.reindex(index=major, columns=minor, copy=False) if dtype is not None: v = v.astype(dtype) values = v.values shape = values.shape reshaped_data[k] = values.reshape((1,) + shape) # segregates dtypes and forms blocks matching to columns blocks = form_blocks(reshaped_data, axes) mgr = BlockManager(blocks, axes).consolidate() return mgr @property def shape(self): return len(self.items), len(self.major_axis), len(self.minor_axis) @classmethod def from_dict(cls, data, intersect=False, orient='items', dtype=None): """ Construct Panel from dict of DataFrame objects Parameters ---------- data : dict {field : DataFrame} intersect : boolean Intersect indexes of input DataFrames orient : {'items', 'minor'}, default 'items' The "orientation" of the data. If the keys of the passed dict should be the items of the result panel, pass 'items' (default). Otherwise if the columns of the values of the passed DataFrame objects should be the items (which in the case of mixed-dtype data you should do), instead pass 'minor' Returns ------- Panel """ from collections import defaultdict orient = orient.lower() if orient == 'minor': new_data = defaultdict(dict) for col, df in data.iteritems(): for item, s in df.iteritems(): new_data[item][col] = s data = new_data elif orient != 'items': # pragma: no cover raise ValueError('only recognize items or minor for orientation') data, index, columns = _homogenize_dict(data, intersect=intersect, dtype=dtype) items = Index(sorted(data.keys())) return Panel(data, items, index, columns) def _init_matrix(self, data, axes, dtype=None, copy=False): values = _prep_ndarray(data, copy=copy) if dtype is not None: try: values = values.astype(dtype) except Exception: raise ValueError('failed to cast to %s' % dtype) shape = values.shape fixed_axes = [] for i, ax in enumerate(axes): if ax is None: ax = _default_index(shape[i]) else: ax = _ensure_index(ax) fixed_axes.append(ax) items = fixed_axes[0] block = make_block(values, items, items) return BlockManager([block], fixed_axes) def __repr__(self): class_name = str(self.__class__) I, N, K = len(self.items), len(self.major_axis), len(self.minor_axis) dims = 'Dimensions: %d (items) x %d (major) x %d (minor)' % (I, N, K) if len(self.major_axis) > 0: major = 'Major axis: %s to %s' % (self.major_axis[0], self.major_axis[-1]) else: major = 'Major axis: None' if len(self.minor_axis) > 0: minor = 'Minor axis: %s to %s' % (self.minor_axis[0], self.minor_axis[-1]) else: minor = 'Minor axis: None' if len(self.items) > 0: items = 'Items: %s to %s' % (self.items[0], self.items[-1]) else: items = 'Items: None' output = '%s\n%s\n%s\n%s\n%s' % (class_name, dims, items, major, minor) return output def __iter__(self): return iter(self.items) def iteritems(self): for item in self.items: yield item, self[item] # Name that won't get automatically converted to items by 2to3. items is # already in use for the first axis. iterkv = iteritems def _get_plane_axes(self, axis): """ """ axis = self._get_axis_name(axis) if axis == 'major_axis': index = self.minor_axis columns = self.items if axis == 'minor_axis': index = self.major_axis columns = self.items elif axis == 'items': index = self.major_axis columns = self.minor_axis return index, columns @property def _constructor(self): return Panel # Fancy indexing _ix = None @property def ix(self): if self._ix is None: self._ix = _NDFrameIndexer(self) return self._ix def _wrap_array(self, arr, axes, copy=False): items, major, minor = axes return self._constructor(arr, items=items, major_axis=major, minor_axis=minor, copy=copy) fromDict = from_dict def to_sparse(self, fill_value=None, kind='block'): """ Convert to SparsePanel Parameters ---------- fill_value : float, default NaN kind : {'block', 'integer'} Returns ------- y : SparseDataFrame """ from pandas.core.sparse import SparsePanel frames = dict(self.iterkv()) return SparsePanel(frames, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, default_kind=kind, default_fill_value=fill_value) # TODO: needed? def keys(self): return list(self.items) def _get_values(self): self._consolidate_inplace() return self._data.as_matrix() values = property(fget=_get_values) #---------------------------------------------------------------------- # Getting and setting elements def get_value(self, item, major, minor): """ Quickly retrieve single value at (item, major, minor) location Parameters ---------- item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) Returns ------- value : scalar value """ # hm, two layers to the onion frame = self._get_item_cache(item) return frame.get_value(major, minor) def set_value(self, item, major, minor, value): """ Quickly set single value at (item, major, minor) location Parameters ---------- item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) value : scalar Returns ------- panel : Panel If label combo is contained, will be reference to calling Panel, otherwise a new object """ try: frame = self._get_item_cache(item) frame.set_value(major, minor, value) return self except KeyError: ax1, ax2, ax3 = self._expand_axes((item, major, minor)) result = self.reindex(items=ax1, major=ax2, minor=ax3, copy=False) likely_dtype = com._infer_dtype(value) made_bigger = not np.array_equal(ax1, self.items) # how to make this logic simpler? if made_bigger: com._possibly_cast_item(result, item, likely_dtype) return result.set_value(item, major, minor, value) def _box_item_values(self, key, values): return DataFrame(values, index=self.major_axis, columns=self.minor_axis) def __getattr__(self, name): """After regular attribute access, try looking up the name of an item. This allows simpler access to items for interactive use.""" if name in self.items: return self[name] raise AttributeError("'%s' object has no attribute '%s'" % (type(self).__name__, name)) def _slice(self, slobj, axis=0): new_data = self._data.get_slice(slobj, axis=axis) return self._constructor(new_data) def __setitem__(self, key, value): _, N, K = self.shape if isinstance(value, DataFrame): value = value.reindex(index=self.major_axis, columns=self.minor_axis) mat = value.values elif isinstance(value, np.ndarray): assert(value.shape == (N, K)) mat = np.asarray(value) elif np.isscalar(value): dtype = _infer_dtype(value) mat = np.empty((N, K), dtype=dtype) mat.fill(value) mat = mat.reshape((1, N, K)) NDFrame._set_item(self, key, mat) def pop(self, item): """ Return item slice from panel and delete from panel Parameters ---------- key : object Must be contained in panel's items Returns ------- y : DataFrame """ return NDFrame.pop(self, item) def __getstate__(self): "Returned pickled representation of the panel" return self._data def __setstate__(self, state): # old Panel pickle if isinstance(state, BlockManager): self._data = state elif len(state) == 4: # pragma: no cover self._unpickle_panel_compat(state) else: # pragma: no cover raise ValueError('unrecognized pickle') self._item_cache = {} def _unpickle_panel_compat(self, state): # pragma: no cover "Unpickle the panel" _unpickle = com._unpickle_array vals, items, major, minor = state items = _unpickle(items) major = _unpickle(major) minor = _unpickle(minor) values = _unpickle(vals) wp = Panel(values, items, major, minor) self._data = wp._data def conform(self, frame, axis='items'): """ Conform input DataFrame to align with chosen axis pair. Parameters ---------- frame : DataFrame axis : {'items', 'major', 'minor'} Axis the input corresponds to. E.g., if axis='major', then the frame's columns would be items, and the index would be values of the minor axis Returns ------- DataFrame """ index, columns = self._get_plane_axes(axis) return frame.reindex(index=index, columns=columns) def reindex(self, major=None, items=None, minor=None, method=None, major_axis=None, minor_axis=None, copy=True): """ Conform panel to new axis or axes Parameters ---------- major : Index or sequence, default None Can also use 'major_axis' keyword items : Index or sequence, default None minor : Index or sequence, default None Can also use 'minor_axis' keyword method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap copy : boolean, default True Return a new object, even if the passed indexes are the same Returns ------- Panel (new object) """ result = self major = _mut_exclusive(major, major_axis) minor = _mut_exclusive(minor, minor_axis) if major is not None: result = result._reindex_axis(major, method, 1, copy) if minor is not None: result = result._reindex_axis(minor, method, 2, copy) if items is not None: result = result._reindex_axis(items, method, 0, copy) if result is self and copy: raise ValueError('Must specify at least one axis') return result def reindex_like(self, other, method=None): """ Reindex Panel to match indices of another Panel Parameters ---------- other : Panel method : string or None Returns ------- reindexed : Panel """ # todo: object columns return self.reindex(major=other.major_axis, items=other.items, minor=other.minor_axis, method=method) def _combine(self, other, func, axis=0): if isinstance(other, Panel): return self._combine_panel(other, func) elif isinstance(other, DataFrame): return self._combine_frame(other, func, axis=axis) elif np.isscalar(other): new_values = func(self.values, other) return Panel(new_values, self.items, self.major_axis, self.minor_axis) def __neg__(self): return -1 * self def _combine_frame(self, other, func, axis=0): index, columns = self._get_plane_axes(axis) axis = self._get_axis_number(axis) other = other.reindex(index=index, columns=columns) if axis == 0: new_values = func(self.values, other.values) elif axis == 1: new_values = func(self.values.swapaxes(0, 1), other.values.T) new_values = new_values.swapaxes(0, 1) elif axis == 2: new_values = func(self.values.swapaxes(0, 2), other.values) new_values = new_values.swapaxes(0, 2) return Panel(new_values, self.items, self.major_axis, self.minor_axis) def _combine_panel(self, other, func): items = self.items + other.items major = self.major_axis + other.major_axis minor = self.minor_axis + other.minor_axis # could check that everything's the same size, but forget it this = self.reindex(items=items, major=major, minor=minor) other = other.reindex(items=items, major=major, minor=minor) result_values = func(this.values, other.values) return Panel(result_values, items, major, minor) def fillna(self, value=None, method='pad'): """ Fill NaN values using the specified method. Member Series / TimeSeries are filled separately. Parameters ---------- value : any kind (should be same type as array) Value to use to fill holes (e.g. 0) method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad' Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap Returns ------- y : DataFrame See also -------- DataFrame.reindex, DataFrame.asfreq """ if value is None: result = {} for col, s in self.iterkv(): result[col] = s.fillna(method=method, value=value) return Panel.from_dict(result) else: new_data = self._data.fillna(value) return Panel(new_data) add = _panel_arith_method(operator.add, 'add') subtract = sub = _panel_arith_method(operator.sub, 'subtract') multiply = mul = _panel_arith_method(operator.mul, 'multiply') try: divide = div = _panel_arith_method(operator.div, 'divide') except AttributeError: # pragma: no cover # Python 3 divide = div = _panel_arith_method(operator.truediv, 'divide') def major_xs(self, key, copy=True): """ Return slice of panel along major axis Parameters ---------- key : object Major axis label copy : boolean, default False Copy data Returns ------- y : DataFrame index -> minor axis, columns -> items """ return self.xs(key, axis=1, copy=copy) def minor_xs(self, key, copy=True): """ Return slice of panel along minor axis Parameters ---------- key : object Minor axis label copy : boolean, default False Copy data Returns ------- y : DataFrame index -> major axis, columns -> items """ return self.xs(key, axis=2, copy=copy) def xs(self, key, axis=1, copy=True): """ Return slice of panel along selected axis Parameters ---------- key : object Label axis : {'items', 'major', 'minor}, default 1/'major' Returns ------- y : DataFrame """ if axis == 0: data = self[key] if copy: data = data.copy() return data self._consolidate_inplace() axis_number = self._get_axis_number(axis) new_data = self._data.xs(key, axis=axis_number, copy=copy) return DataFrame(new_data) def groupby(self, function, axis='major'): """ Group data on given axis, returning GroupBy object Parameters ---------- function : callable Mapping function for chosen access axis : {'major', 'minor', 'items'}, default 'major' Returns ------- grouped : PanelGroupBy """ from pandas.core.groupby import PanelGroupBy axis = self._get_axis_number(axis) return PanelGroupBy(self, function, axis=axis) def swapaxes(self, axis1='major', axis2='minor'): """ Interchange axes and swap values axes appropriately Returns ------- y : Panel (new object) """ i = self._get_axis_number(axis1) j = self._get_axis_number(axis2) if i == j: raise ValueError('Cannot specify the same axis') mapping = {i : j, j : i} new_axes = (self._get_axis(mapping.get(k, k)) for k in range(3)) new_values = self.values.swapaxes(i, j).copy() return Panel(new_values, *new_axes) def to_frame(self, filter_observations=True): """ Transform wide format into long (stacked) format as DataFrame Parameters ---------- filter_observations : boolean, default True Drop (major, minor) pairs without a complete set of observations across all the items Returns ------- y : DataFrame """ _, N, K = self.shape if filter_observations: mask = com.notnull(self.values).all(axis=0) # size = mask.sum() selector = mask.ravel() else: # size = N * K selector = slice(None, None) data = {} for item in self.items: data[item] = self[item].values.ravel()[selector] major_labels = np.arange(N).repeat(K)[selector] # Anyone think of a better way to do this? np.repeat does not # do what I want minor_labels = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)] minor_labels = minor_labels.ravel()[selector] index = MultiIndex(levels=[self.major_axis, self.minor_axis], labels=[major_labels, minor_labels], names=['major', 'minor']) return DataFrame(data, index=index, columns=self.items) to_long = deprecate('to_long', to_frame) toLong = deprecate('toLong', to_frame) def filter(self, items): """ Restrict items in panel to input list Parameters ---------- items : sequence Returns ------- y : Panel """ intersection = self.items.intersection(items) return self.reindex(items=intersection) def apply(self, func, axis='major'): """ Apply Parameters ---------- func : numpy function Signature should match numpy.{sum, mean, var, std} etc. axis : {'major', 'minor', 'items'} fill_value : boolean, default True Replace NaN values with specified first Returns ------- result : DataFrame or Panel """ i = self._get_axis_number(axis) result = np.apply_along_axis(func, i, self.values) return self._wrap_result(result, axis=axis) def _reduce(self, op, axis=0, skipna=True): axis_name = self._get_axis_name(axis) axis_number = self._get_axis_number(axis_name) f = lambda x: op(x, axis=axis_number, skipna=skipna, copy=True) result = f(self.values) index, columns = self._get_plane_axes(axis_name) if axis_name != 'items': result = result.T return DataFrame(result, index=index, columns=columns) def _wrap_result(self, result, axis): axis = self._get_axis_name(axis) index, columns = self._get_plane_axes(axis) if axis != 'items': result = result.T return DataFrame(result, index=index, columns=columns) def count(self, axis='major'): """ Return number of observations over requested axis. Parameters ---------- axis : {'items', 'major', 'minor'} or {0, 1, 2} Returns ------- count : DataFrame """ i = self._get_axis_number(axis) values = self.values mask = np.isfinite(values) result = mask.sum(axis=i) return self._wrap_result(result, axis) def sum(self, axis='major', skipna=True): return self._reduce(nanops.nansum, axis=axis, skipna=skipna) _add_docs(sum, 'sum', 'sum') def mean(self, axis='major', skipna=True): return self._reduce(nanops.nanmean, axis=axis, skipna=skipna) _add_docs(mean, 'mean', 'mean') def var(self, axis='major', skipna=True): return self._reduce(nanops.nanvar, axis=axis, skipna=skipna) _add_docs(var, 'unbiased variance', 'variance') def std(self, axis='major', skipna=True): return self.var(axis=axis, skipna=skipna).apply(np.sqrt) _add_docs(std, 'unbiased standard deviation', 'stdev') def skew(self, axis='major', skipna=True): return self._reduce(nanops.nanskew, axis=axis, skipna=skipna) _add_docs(std, 'unbiased skewness', 'skew') def prod(self, axis='major', skipna=True): return self._reduce(nanops.nanprod, axis=axis, skipna=skipna) _add_docs(prod, 'product', 'prod') def compound(self, axis='major', skipna=True): return (1 + self).prod(axis=axis, skipna=skipna) - 1 _add_docs(compound, 'compounded percentage', 'compounded') def median(self, axis='major', skipna=True): return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna) _add_docs(median, 'median', 'median') def max(self, axis='major', skipna=True): return self._reduce(nanops.nanmax, axis=axis, skipna=skipna) _add_docs(max, 'maximum', 'maximum') def min(self, axis='major', skipna=True): return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) _add_docs(min, 'minimum', 'minimum') def shift(self, lags, axis='major'): """ Shift major or minor axis by specified number of lags. Drops periods Parameters ---------- lags : int Needs to be a positive number currently axis : {'major', 'minor'} Returns ------- shifted : Panel """ values = self.values items = self.items major_axis = self.major_axis minor_axis = self.minor_axis if axis == 'major': values = values[:, :-lags, :] major_axis = major_axis[lags:] elif axis == 'minor': values = values[:, :, :-lags] minor_axis = minor_axis[lags:] else: raise ValueError('Invalid axis') return Panel(values, items=items, major_axis=major_axis, minor_axis=minor_axis) def truncate(self, before=None, after=None, axis='major'): """Function truncates a sorted Panel before and/or after some particular values on the requested axis Parameters ---------- before : date Left boundary after : date Right boundary axis : {'major', 'minor', 'items'} Returns ------- Panel """ axis = self._get_axis_name(axis) index = self._get_axis(axis) beg_slice, end_slice = index.slice_locs(before, after) new_index = index[beg_slice:end_slice] return self.reindex(**{axis : new_index}) def join(self, other, how='left', lsuffix='', rsuffix=''): """ Join items with other Panel either on major and minor axes column Parameters ---------- other : Panel or list of Panels Index should be similar to one of the columns in this one how : {'left', 'right', 'outer', 'inner'} How to handle indexes of the two objects. Default: 'left' for joining on index, None otherwise * left: use calling frame's index * right: use input frame's index * outer: form union of indexes * inner: use intersection of indexes lsuffix : string Suffix to use from left frame's overlapping columns rsuffix : string Suffix to use from right frame's overlapping columns Returns ------- joined : Panel """ from pandas.tools.merge import concat if isinstance(other, Panel): join_major, join_minor = self._get_join_index(other, how) this = self.reindex(major=join_major, minor=join_minor) other = other.reindex(major=join_major, minor=join_minor) merged_data = this._data.merge(other._data, lsuffix, rsuffix) return self._constructor(merged_data) else: if lsuffix or rsuffix: raise ValueError('Suffixes not supported when passing multiple ' 'panels') if how == 'left': how = 'outer' join_axes = [self.major_axis, self.minor_axis] elif how == 'right': raise ValueError('Right join not supported with multiple ' 'panels') else: join_axes = None return concat([self] + list(other), axis=0, join=how, join_axes=join_axes, verify_integrity=True) def _get_join_index(self, other, how): if how == 'left': join_major, join_minor = self.major_axis, self.minor_axis elif how == 'right': join_major, join_minor = other.major_axis, other.minor_axis elif how == 'inner': join_major = self.major_axis.intersection(other.major_axis) join_minor = self.minor_axis.intersection(other.minor_axis) elif how == 'outer': join_major = self.major_axis.union(other.major_axis) join_minor = self.minor_axis.union(other.minor_axis) return join_major, join_minor
class Panel(NDFrame): """ Represents wide format panel data, stored as 3-dimensional array Parameters ---------- data : ndarray (items x major x minor), or dict of DataFrames items : Index or array-like axis=0 major_axis : Index or array-like axis=1 minor_axis : Index or array-like axis=2 dtype : dtype, default None Data type to force, otherwise infer copy : boolean, default False Copy data from inputs. Only affects DataFrame / 2d ndarray input """ @property def _constructor(self): return type(self) _constructor_sliced = DataFrame def __init__(self, data=None, items=None, major_axis=None, minor_axis=None, copy=False, dtype=None): self._init_data(data=data, items=items, major_axis=major_axis, minor_axis=minor_axis, copy=copy, dtype=dtype) def _init_data(self, data, copy, dtype, **kwargs): """ Generate ND initialization; axes are passed as required objects to __init__ """ if data is None: data = {} passed_axes = [kwargs.get(a) for a in self._AXIS_ORDERS] axes = None if isinstance(data, BlockManager): if any(x is not None for x in passed_axes): axes = [ x if x is not None else y for x, y in zip(passed_axes, data.axes) ] mgr = data elif isinstance(data, dict): mgr = self._init_dict(data, passed_axes, dtype=dtype) copy = False dtype = None elif isinstance(data, (np.ndarray, list)): mgr = self._init_matrix(data, passed_axes, dtype=dtype, copy=copy) copy = False dtype = None else: # pragma: no cover raise PandasError('Panel constructor not properly called!') NDFrame.__init__(self, mgr, axes=axes, copy=copy, dtype=dtype) def _init_dict(self, data, axes, dtype=None): haxis = axes.pop(self._info_axis_number) # prefilter if haxis passed if haxis is not None: haxis = _ensure_index(haxis) data = OrderedDict( (k, v) for k, v in compat.iteritems(data) if k in haxis) else: ks = list(data.keys()) if not isinstance(data, OrderedDict): ks = _try_sort(ks) haxis = Index(ks) for k, v in compat.iteritems(data): if isinstance(v, dict): data[k] = self._constructor_sliced(v) # extract axis for remaining axes & create the slicemap raxes = [ self._extract_axis(self, data, axis=i) if a is None else a for i, a in enumerate(axes) ] raxes_sm = self._extract_axes_for_slice(self, raxes) # shallow copy arrays = [] haxis_shape = [len(a) for a in raxes] for h in haxis: v = values = data.get(h) if v is None: values = np.empty(haxis_shape, dtype=dtype) values.fill(np.nan) elif isinstance(v, self._constructor_sliced): d = raxes_sm.copy() d['copy'] = False v = v.reindex(**d) if dtype is not None: v = v.astype(dtype) values = v.values arrays.append(values) return self._init_arrays(arrays, haxis, [haxis] + raxes) def _init_arrays(self, arrays, arr_names, axes): return create_block_manager_from_arrays(arrays, arr_names, axes) @classmethod def from_dict(cls, data, intersect=False, orient='items', dtype=None): """ Construct Panel from dict of DataFrame objects Parameters ---------- data : dict {field : DataFrame} intersect : boolean Intersect indexes of input DataFrames orient : {'items', 'minor'}, default 'items' The "orientation" of the data. If the keys of the passed dict should be the items of the result panel, pass 'items' (default). Otherwise if the columns of the values of the passed DataFrame objects should be the items (which in the case of mixed-dtype data you should do), instead pass 'minor' Returns ------- Panel """ orient = orient.lower() if orient == 'minor': new_data = OrderedDefaultdict(dict) for col, df in compat.iteritems(data): for item, s in compat.iteritems(df): new_data[item][col] = s data = new_data elif orient != 'items': # pragma: no cover raise ValueError('Orientation must be one of {items, minor}.') d = cls._homogenize_dict(cls, data, intersect=intersect, dtype=dtype) ks = list(d['data'].keys()) if not isinstance(d['data'], OrderedDict): ks = list(sorted(ks)) d[cls._info_axis_name] = Index(ks) return cls(**d) # Comparison methods __add__ = _arith_method(operator.add, '__add__') __sub__ = _arith_method(operator.sub, '__sub__') __truediv__ = _arith_method(operator.truediv, '__truediv__') __floordiv__ = _arith_method(operator.floordiv, '__floordiv__') __mul__ = _arith_method(operator.mul, '__mul__') __pow__ = _arith_method(operator.pow, '__pow__') __radd__ = _arith_method(operator.add, '__radd__') __rmul__ = _arith_method(operator.mul, '__rmul__') __rsub__ = _arith_method(lambda x, y: y - x, '__rsub__') __rtruediv__ = _arith_method(lambda x, y: y / x, '__rtruediv__') __rfloordiv__ = _arith_method(lambda x, y: y // x, '__rfloordiv__') __rpow__ = _arith_method(lambda x, y: y**x, '__rpow__') if not compat.PY3: __div__ = _arith_method(operator.div, '__div__') __rdiv__ = _arith_method(lambda x, y: y / x, '__rdiv__') def __getitem__(self, key): if isinstance(self._info_axis, MultiIndex): return self._getitem_multilevel(key) return super(Panel, self).__getitem__(key) def _getitem_multilevel(self, key): info = self._info_axis loc = info.get_loc(key) if isinstance(loc, (slice, np.ndarray)): new_index = info[loc] result_index = _maybe_droplevels(new_index, key) slices = [loc] + [slice(None) for x in range(self._AXIS_LEN - 1)] new_values = self.values[slices] d = self._construct_axes_dict(self._AXIS_ORDERS[1:]) d[self._info_axis_name] = result_index result = self._constructor(new_values, **d) return result else: return self._get_item_cache(key) def _init_matrix(self, data, axes, dtype=None, copy=False): values = self._prep_ndarray(self, data, copy=copy) if dtype is not None: try: values = values.astype(dtype) except Exception: raise ValueError('failed to cast to %s' % dtype) shape = values.shape fixed_axes = [] for i, ax in enumerate(axes): if ax is None: ax = _default_index(shape[i]) else: ax = _ensure_index(ax) fixed_axes.append(ax) return create_block_manager_from_blocks([values], fixed_axes) #---------------------------------------------------------------------- # Comparison methods def _compare_constructor(self, other, func): if not self._indexed_same(other): raise Exception('Can only compare identically-labeled ' 'same type objects') new_data = {} for col in self._info_axis: new_data[col] = func(self[col], other[col]) d = self._construct_axes_dict(copy=False) return self._constructor(data=new_data, **d) # boolean operators __and__ = _arith_method(operator.and_, '__and__') __or__ = _arith_method(operator.or_, '__or__') __xor__ = _arith_method(operator.xor, '__xor__') # Comparison methods __eq__ = _comp_method(operator.eq, '__eq__') __ne__ = _comp_method(operator.ne, '__ne__') __lt__ = _comp_method(operator.lt, '__lt__') __gt__ = _comp_method(operator.gt, '__gt__') __le__ = _comp_method(operator.le, '__le__') __ge__ = _comp_method(operator.ge, '__ge__') eq = _comp_method(operator.eq, 'eq') ne = _comp_method(operator.ne, 'ne') gt = _comp_method(operator.gt, 'gt') lt = _comp_method(operator.lt, 'lt') ge = _comp_method(operator.ge, 'ge') le = _comp_method(operator.le, 'le') #---------------------------------------------------------------------- # Magic methods def __unicode__(self): """ Return a string representation for a particular Panel Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3. """ class_name = str(self.__class__) shape = self.shape dims = u('Dimensions: %s') % ' x '.join( ["%d (%s)" % (s, a) for a, s in zip(self._AXIS_ORDERS, shape)]) def axis_pretty(a): v = getattr(self, a) if len(v) > 0: return u('%s axis: %s to %s') % (a.capitalize(), com.pprint_thing(v[0]), com.pprint_thing(v[-1])) else: return u('%s axis: None') % a.capitalize() output = '\n'.join([class_name, dims] + [axis_pretty(a) for a in self._AXIS_ORDERS]) return output def _get_plane_axes(self, axis): """ Get my plane axes: these are already (as compared with higher level planes), as we are returning a DataFrame axes """ axis = self._get_axis_name(axis) if axis == 'major_axis': index = self.minor_axis columns = self.items if axis == 'minor_axis': index = self.major_axis columns = self.items elif axis == 'items': index = self.major_axis columns = self.minor_axis return index, columns fromDict = from_dict def to_sparse(self, fill_value=None, kind='block'): """ Convert to SparsePanel Parameters ---------- fill_value : float, default NaN kind : {'block', 'integer'} Returns ------- y : SparseDataFrame """ from pandas.core.sparse import SparsePanel frames = dict(compat.iteritems(self)) return SparsePanel(frames, items=self.items, major_axis=self.major_axis, minor_axis=self.minor_axis, default_kind=kind, default_fill_value=fill_value) def to_excel(self, path, na_rep=''): """ Write each DataFrame in Panel to a separate excel sheet Parameters ---------- excel_writer : string or ExcelWriter object File path or existing ExcelWriter na_rep : string, default '' Missing data representation """ from pandas.io.excel import ExcelWriter writer = ExcelWriter(path) for item, df in compat.iteritems(self): name = str(item) df.to_excel(writer, name, na_rep=na_rep) writer.save() def as_matrix(self): self._consolidate_inplace() return self._data.as_matrix() #---------------------------------------------------------------------- # Getting and setting elements def get_value(self, *args): """ Quickly retrieve single value at (item, major, minor) location Parameters ---------- item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) Returns ------- value : scalar value """ # require an arg for each axis if not ((len(args) == self._AXIS_LEN)): raise AssertionError() # hm, two layers to the onion frame = self._get_item_cache(args[0]) return frame.get_value(*args[1:]) def set_value(self, *args): """ Quickly set single value at (item, major, minor) location Parameters ---------- item : item label (panel item) major : major axis label (panel item row) minor : minor axis label (panel item column) value : scalar Returns ------- panel : Panel If label combo is contained, will be reference to calling Panel, otherwise a new object """ # require an arg for each axis and the value if not ((len(args) == self._AXIS_LEN + 1)): raise AssertionError() try: frame = self._get_item_cache(args[0]) frame.set_value(*args[1:]) return self except KeyError: axes = self._expand_axes(args) d = self._construct_axes_dict_from(self, axes, copy=False) result = self.reindex(**d) args = list(args) likely_dtype, args[-1] = _infer_dtype_from_scalar(args[-1]) made_bigger = not np.array_equal(axes[0], self._info_axis) # how to make this logic simpler? if made_bigger: com._possibly_cast_item(result, args[0], likely_dtype) return result.set_value(*args) def _box_item_values(self, key, values): d = self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:]) return self._constructor_sliced(values, **d) def _slice(self, slobj, axis=0, raise_on_error=False): new_data = self._data.get_slice(slobj, axis=axis, raise_on_error=raise_on_error) return self._constructor(new_data) def __setitem__(self, key, value): shape = tuple(self.shape) if isinstance(value, self._constructor_sliced): value = value.reindex( **self._construct_axes_dict_for_slice(self._AXIS_ORDERS[1:])) mat = value.values elif isinstance(value, np.ndarray): if not ((value.shape == shape[1:])): raise AssertionError() mat = np.asarray(value) elif np.isscalar(value): dtype, value = _infer_dtype_from_scalar(value) mat = np.empty(shape[1:], dtype=dtype) mat.fill(value) else: raise TypeError('Cannot set item of type: %s' % str(type(value))) mat = mat.reshape(tuple([1]) + shape[1:]) NDFrame._set_item(self, key, mat) def _unpickle_panel_compat(self, state): # pragma: no cover "Unpickle the panel" _unpickle = com._unpickle_array vals, items, major, minor = state items = _unpickle(items) major = _unpickle(major) minor = _unpickle(minor) values = _unpickle(vals) wp = Panel(values, items, major, minor) self._data = wp._data def conform(self, frame, axis='items'): """ Conform input DataFrame to align with chosen axis pair. Parameters ---------- frame : DataFrame axis : {'items', 'major', 'minor'} Axis the input corresponds to. E.g., if axis='major', then the frame's columns would be items, and the index would be values of the minor axis Returns ------- DataFrame """ axes = self._get_plane_axes(axis) return frame.reindex(**self._extract_axes_for_slice(self, axes)) def _needs_reindex_multi(self, axes, method, level): # only allowing multi-index on Panel (and not > dims) return method is None and not self._is_mixed_type and self._AXIS_LEN <= 3 and com._count_not_none( *axes.values()) == 3 def _reindex_multi(self, axes, copy, fill_value): """ we are guaranteed non-Nones in the axes! """ items = axes['items'] major = axes['major_axis'] minor = axes['minor_axis'] a0, a1, a2 = len(items), len(major), len(minor) values = self.values new_values = np.empty((a0, a1, a2), dtype=values.dtype) new_items, indexer0 = self.items.reindex(items) new_major, indexer1 = self.major_axis.reindex(major) new_minor, indexer2 = self.minor_axis.reindex(minor) if indexer0 is None: indexer0 = lrange(len(new_items)) if indexer1 is None: indexer1 = lrange(len(new_major)) if indexer2 is None: indexer2 = lrange(len(new_minor)) for i, ind in enumerate(indexer0): com.take_2d_multi(values[ind], (indexer1, indexer2), out=new_values[i]) return Panel(new_values, items=new_items, major_axis=new_major, minor_axis=new_minor) def dropna(self, axis=0, how='any'): """ Drop 2D from panel, holding passed axis constant Parameters ---------- axis : int, default 0 Axis to hold constant. E.g. axis=1 will drop major_axis entries having a certain amount of NA data how : {'all', 'any'}, default 'any' 'any': one or more values are NA in the DataFrame along the axis. For 'all' they all must be. Returns ------- dropped : Panel """ axis = self._get_axis_number(axis) values = self.values mask = com.notnull(values) for ax in reversed(sorted(set(range(self._AXIS_LEN)) - set([axis]))): mask = mask.sum(ax) per_slice = np.prod(values.shape[:axis] + values.shape[axis + 1:]) if how == 'all': cond = mask > 0 else: cond = mask == per_slice new_ax = self._get_axis(axis)[cond] return self.reindex_axis(new_ax, axis=axis) def _combine(self, other, func, axis=0): if isinstance(other, Panel): return self._combine_panel(other, func) elif isinstance(other, DataFrame): return self._combine_frame(other, func, axis=axis) elif np.isscalar(other): return self._combine_const(other, func) def _combine_const(self, other, func): new_values = func(self.values, other) d = self._construct_axes_dict() return self._constructor(new_values, **d) def _combine_frame(self, other, func, axis=0): index, columns = self._get_plane_axes(axis) axis = self._get_axis_number(axis) other = other.reindex(index=index, columns=columns) if axis == 0: new_values = func(self.values, other.values) elif axis == 1: new_values = func(self.values.swapaxes(0, 1), other.values.T) new_values = new_values.swapaxes(0, 1) elif axis == 2: new_values = func(self.values.swapaxes(0, 2), other.values) new_values = new_values.swapaxes(0, 2) return self._constructor(new_values, self.items, self.major_axis, self.minor_axis) def _combine_panel(self, other, func): items = self.items + other.items major = self.major_axis + other.major_axis minor = self.minor_axis + other.minor_axis # could check that everything's the same size, but forget it this = self.reindex(items=items, major=major, minor=minor) other = other.reindex(items=items, major=major, minor=minor) result_values = func(this.values, other.values) return self._constructor(result_values, items, major, minor) def fillna(self, value=None, method=None): """ Fill NaN values using the specified method. Member Series / TimeSeries are filled separately. Parameters ---------- value : any kind (should be same type as array) Value to use to fill holes (e.g. 0) method : {'backfill', 'bfill', 'pad', 'ffill', None}, default 'pad' Method to use for filling holes in reindexed Series pad / ffill: propagate last valid observation forward to next valid backfill / bfill: use NEXT valid observation to fill gap Returns ------- y : DataFrame See also -------- DataFrame.reindex, DataFrame.asfreq """ if isinstance(value, (list, tuple)): raise TypeError('"value" parameter must be a scalar or dict, but ' 'you passed a "{0}"'.format(type(value).__name__)) if value is None: if method is None: raise ValueError('must specify a fill method or value') result = {} for col, s in compat.iteritems(self): result[col] = s.fillna(method=method, value=value) return self._constructor.from_dict(result) else: if method is not None: raise ValueError('cannot specify both a fill method and value') new_data = self._data.fillna(value) return self._constructor(new_data) def ffill(self): return self.fillna(method='ffill') def bfill(self): return self.fillna(method='bfill') def major_xs(self, key, copy=True): """ Return slice of panel along major axis Parameters ---------- key : object Major axis label copy : boolean, default False Copy data Returns ------- y : DataFrame index -> minor axis, columns -> items """ return self.xs(key, axis=self._AXIS_LEN - 2, copy=copy) def minor_xs(self, key, copy=True): """ Return slice of panel along minor axis Parameters ---------- key : object Minor axis label copy : boolean, default False Copy data Returns ------- y : DataFrame index -> major axis, columns -> items """ return self.xs(key, axis=self._AXIS_LEN - 1, copy=copy) def xs(self, key, axis=1, copy=True): """ Return slice of panel along selected axis Parameters ---------- key : object Label axis : {'items', 'major', 'minor}, default 1/'major' Returns ------- y : ndim(self)-1 """ axis = self._get_axis_number(axis) if axis == 0: data = self[key] if copy: data = data.copy() return data self._consolidate_inplace() axis_number = self._get_axis_number(axis) new_data = self._data.xs(key, axis=axis_number, copy=copy) return self._construct_return_type(new_data) _xs = xs def _ixs(self, i, axis=0): # for compatibility with .ix indexing # Won't work with hierarchical indexing yet key = self._get_axis(axis)[i] # xs cannot handle a non-scalar key, so just reindex here if _is_list_like(key): return self.reindex(**{self._get_axis_name(axis): key}) return self.xs(key, axis=axis) def groupby(self, function, axis='major'): """ Group data on given axis, returning GroupBy object Parameters ---------- function : callable Mapping function for chosen access axis : {'major', 'minor', 'items'}, default 'major' Returns ------- grouped : PanelGroupBy """ from pandas.core.groupby import PanelGroupBy axis = self._get_axis_number(axis) return PanelGroupBy(self, function, axis=axis) def to_frame(self, filter_observations=True): """ Transform wide format into long (stacked) format as DataFrame Parameters ---------- filter_observations : boolean, default True Drop (major, minor) pairs without a complete set of observations across all the items Returns ------- y : DataFrame """ _, N, K = self.shape if filter_observations: mask = com.notnull(self.values).all(axis=0) # size = mask.sum() selector = mask.ravel() else: # size = N * K selector = slice(None, None) data = {} for item in self.items: data[item] = self[item].values.ravel()[selector] major_labels = np.arange(N).repeat(K)[selector] # Anyone think of a better way to do this? np.repeat does not # do what I want minor_labels = np.arange(K).reshape(1, K)[np.zeros(N, dtype=int)] minor_labels = minor_labels.ravel()[selector] maj_name = self.major_axis.name or 'major' min_name = self.minor_axis.name or 'minor' index = MultiIndex(levels=[self.major_axis, self.minor_axis], labels=[major_labels, minor_labels], names=[maj_name, min_name]) return DataFrame(data, index=index, columns=self.items) to_long = deprecate('to_long', to_frame) toLong = deprecate('toLong', to_frame) def apply(self, func, axis='major'): """ Apply Parameters ---------- func : numpy function Signature should match numpy.{sum, mean, var, std} etc. axis : {'major', 'minor', 'items'} fill_value : boolean, default True Replace NaN values with specified first Returns ------- result : DataFrame or Panel """ i = self._get_axis_number(axis) result = np.apply_along_axis(func, i, self.values) return self._wrap_result(result, axis=axis) def _reduce(self, op, axis=0, skipna=True): axis_name = self._get_axis_name(axis) axis_number = self._get_axis_number(axis_name) f = lambda x: op(x, axis=axis_number, skipna=skipna) result = f(self.values) axes = self._get_plane_axes(axis_name) if result.ndim == 2 and axis_name != self._info_axis_name: result = result.T return self._construct_return_type(result, axes) def _construct_return_type(self, result, axes=None, **kwargs): """ return the type for the ndim of the result """ ndim = result.ndim if self.ndim == ndim: """ return the construction dictionary for these axes """ if axes is None: return self._constructor(result) return self._constructor(result, **self._construct_axes_dict()) elif self.ndim == ndim + 1: if axes is None: return self._constructor_sliced(result) return self._constructor_sliced( result, **self._extract_axes_for_slice(self, axes)) raise PandasError( "invalid _construct_return_type [self->%s] [result->%s]" % (self.ndim, result.ndim)) def _wrap_result(self, result, axis): axis = self._get_axis_name(axis) axes = self._get_plane_axes(axis) if result.ndim == 2 and axis != self._info_axis_name: result = result.T return self._construct_return_type(result, axes) def count(self, axis='major'): """ Return number of observations over requested axis. Parameters ---------- axis : {'items', 'major', 'minor'} or {0, 1, 2} Returns ------- count : DataFrame """ i = self._get_axis_number(axis) values = self.values mask = np.isfinite(values) result = mask.sum(axis=i) return self._wrap_result(result, axis) def shift(self, lags, axis='major'): """ Shift major or minor axis by specified number of leads/lags. Drops periods right now compared with DataFrame.shift Parameters ---------- lags : int axis : {'major', 'minor'} Returns ------- shifted : Panel """ values = self.values items = self.items major_axis = self.major_axis minor_axis = self.minor_axis if lags > 0: vslicer = slice(None, -lags) islicer = slice(lags, None) elif lags == 0: vslicer = islicer = slice(None) else: vslicer = slice(-lags, None) islicer = slice(None, lags) axis = self._get_axis_name(axis) if axis == 'major_axis': values = values[:, vslicer, :] major_axis = major_axis[islicer] elif axis == 'minor_axis': values = values[:, :, vslicer] minor_axis = minor_axis[islicer] else: raise ValueError('Invalid axis') return self._constructor(values, items=items, major_axis=major_axis, minor_axis=minor_axis) def truncate(self, before=None, after=None, axis='major'): """Function truncates a sorted Panel before and/or after some particular values on the requested axis Parameters ---------- before : date Left boundary after : date Right boundary axis : {'major', 'minor', 'items'} Returns ------- Panel """ axis = self._get_axis_name(axis) index = self._get_axis(axis) beg_slice, end_slice = index.slice_locs(before, after) new_index = index[beg_slice:end_slice] return self.reindex(**{axis: new_index}) def join(self, other, how='left', lsuffix='', rsuffix=''): """ Join items with other Panel either on major and minor axes column Parameters ---------- other : Panel or list of Panels Index should be similar to one of the columns in this one how : {'left', 'right', 'outer', 'inner'} How to handle indexes of the two objects. Default: 'left' for joining on index, None otherwise * left: use calling frame's index * right: use input frame's index * outer: form union of indexes * inner: use intersection of indexes lsuffix : string Suffix to use from left frame's overlapping columns rsuffix : string Suffix to use from right frame's overlapping columns Returns ------- joined : Panel """ from pandas.tools.merge import concat if isinstance(other, Panel): join_major, join_minor = self._get_join_index(other, how) this = self.reindex(major=join_major, minor=join_minor) other = other.reindex(major=join_major, minor=join_minor) merged_data = this._data.merge(other._data, lsuffix, rsuffix) return self._constructor(merged_data) else: if lsuffix or rsuffix: raise ValueError('Suffixes not supported when passing ' 'multiple panels') if how == 'left': how = 'outer' join_axes = [self.major_axis, self.minor_axis] elif how == 'right': raise ValueError('Right join not supported with multiple ' 'panels') else: join_axes = None return concat([self] + list(other), axis=0, join=how, join_axes=join_axes, verify_integrity=True) def update(self, other, join='left', overwrite=True, filter_func=None, raise_conflict=False): """ Modify Panel in place using non-NA values from passed Panel, or object coercible to Panel. Aligns on items Parameters ---------- other : Panel, or object coercible to Panel join : How to join individual DataFrames {'left', 'right', 'outer', 'inner'}, default 'left' overwrite : boolean, default True If True then overwrite values for common keys in the calling panel filter_func : callable(1d-array) -> 1d-array<boolean>, default None Can choose to replace values other than NA. Return True for values that should be updated raise_conflict : bool If True, will raise an error if a DataFrame and other both contain data in the same place. """ if not isinstance(other, self._constructor): other = self._constructor(other) axis_name = self._info_axis_name axis_values = self._info_axis other = other.reindex(**{axis_name: axis_values}) for frame in axis_values: self[frame].update(other[frame], join, overwrite, filter_func, raise_conflict) def _get_join_index(self, other, how): if how == 'left': join_major, join_minor = self.major_axis, self.minor_axis elif how == 'right': join_major, join_minor = other.major_axis, other.minor_axis elif how == 'inner': join_major = self.major_axis.intersection(other.major_axis) join_minor = self.minor_axis.intersection(other.minor_axis) elif how == 'outer': join_major = self.major_axis.union(other.major_axis) join_minor = self.minor_axis.union(other.minor_axis) return join_major, join_minor # miscellaneous data creation @staticmethod def _extract_axes(self, data, axes, **kwargs): """ return a list of the axis indicies """ return [ self._extract_axis(self, data, axis=i, **kwargs) for i, a in enumerate(axes) ] @staticmethod def _extract_axes_for_slice(self, axes): """ return the slice dictionary for these axes """ return dict([ (self._AXIS_SLICEMAP[i], a) for i, a in zip(self._AXIS_ORDERS[self._AXIS_LEN - len(axes):], axes) ]) @staticmethod def _prep_ndarray(self, values, copy=True): if not isinstance(values, np.ndarray): values = np.asarray(values) # NumPy strings are a pain, convert to object if issubclass(values.dtype.type, compat.string_types): values = np.array(values, dtype=object, copy=True) else: if copy: values = values.copy() if not ((values.ndim == self._AXIS_LEN)): raise AssertionError() return values @staticmethod def _homogenize_dict(self, frames, intersect=True, dtype=None): """ Conform set of _constructor_sliced-like objects to either an intersection of indices / columns or a union. Parameters ---------- frames : dict intersect : boolean, default True Returns ------- dict of aligned results & indicies """ result = dict() # caller differs dict/ODict, presered type if isinstance(frames, OrderedDict): result = OrderedDict() adj_frames = OrderedDict() for k, v in compat.iteritems(frames): if isinstance(v, dict): adj_frames[k] = self._constructor_sliced(v) else: adj_frames[k] = v axes = self._AXIS_ORDERS[1:] axes_dict = dict([(a, ax) for a, ax in zip( axes, self._extract_axes(self, adj_frames, axes, intersect=intersect))]) reindex_dict = dict([(self._AXIS_SLICEMAP[a], axes_dict[a]) for a in axes]) reindex_dict['copy'] = False for key, frame in compat.iteritems(adj_frames): if frame is not None: result[key] = frame.reindex(**reindex_dict) else: result[key] = None axes_dict['data'] = result return axes_dict @staticmethod def _extract_axis(self, data, axis=0, intersect=False): index = None if len(data) == 0: index = Index([]) elif len(data) > 0: raw_lengths = [] indexes = [] have_raw_arrays = False have_frames = False for v in data.values(): if isinstance(v, self._constructor_sliced): have_frames = True indexes.append(v._get_axis(axis)) elif v is not None: have_raw_arrays = True raw_lengths.append(v.shape[axis]) if have_frames: index = _get_combined_index(indexes, intersect=intersect) if have_raw_arrays: lengths = list(set(raw_lengths)) if len(lengths) > 1: raise ValueError('ndarrays must match shape on axis %d' % axis) if have_frames: if lengths[0] != len(index): raise AssertionError('Length of data and index must match') else: index = Index(np.arange(lengths[0])) if index is None: index = Index([]) return _ensure_index(index) @classmethod def _add_aggregate_operations(cls): """ add the operations to the cls; evaluate the doc strings again """ # doc strings substitors _agg_doc = """ Wrapper method for %s Parameters ---------- other : """ + "%s or %s" % (cls._constructor_sliced.__name__, cls.__name__) + """ axis : {""" + ', '.join(cls._AXIS_ORDERS) + "}" + """ Axis to broadcast over Returns ------- """ + cls.__name__ + "\n" def _panel_arith_method(op, name): @Substitution(op) @Appender(_agg_doc) def f(self, other, axis=0): return self._combine(other, op, axis=axis) f.__name__ = name return f cls.add = _panel_arith_method(operator.add, 'add') cls.subtract = cls.sub = _panel_arith_method(operator.sub, 'subtract') cls.multiply = cls.mul = _panel_arith_method(operator.mul, 'multiply') try: cls.divide = cls.div = _panel_arith_method(operator.div, 'divide') except AttributeError: # pragma: no cover # Python 3 cls.divide = cls.div = _panel_arith_method(operator.truediv, 'divide') _agg_doc = """ Return %(desc)s over requested axis Parameters ---------- axis : {""" + ', '.join(cls._AXIS_ORDERS) + "} or {" \ + ', '.join([str(i) for i in range(cls._AXIS_LEN)]) + """} skipna : boolean, default True Exclude NA/null values. If an entire row/column is NA, the result will be NA Returns ------- %(outname)s : """ + cls._constructor_sliced.__name__ + "\n" _na_info = """ NA/null values are %s. If all values are NA, result will be NA""" @Substitution(desc='sum', outname='sum') @Appender(_agg_doc) def sum(self, axis='major', skipna=True): return self._reduce(nanops.nansum, axis=axis, skipna=skipna) cls.sum = sum @Substitution(desc='mean', outname='mean') @Appender(_agg_doc) def mean(self, axis='major', skipna=True): return self._reduce(nanops.nanmean, axis=axis, skipna=skipna) cls.mean = mean @Substitution(desc='unbiased variance', outname='variance') @Appender(_agg_doc) def var(self, axis='major', skipna=True): return self._reduce(nanops.nanvar, axis=axis, skipna=skipna) cls.var = var @Substitution(desc='unbiased standard deviation', outname='stdev') @Appender(_agg_doc) def std(self, axis='major', skipna=True): return self.var(axis=axis, skipna=skipna).apply(np.sqrt) cls.std = std @Substitution(desc='unbiased skewness', outname='skew') @Appender(_agg_doc) def skew(self, axis='major', skipna=True): return self._reduce(nanops.nanskew, axis=axis, skipna=skipna) cls.skew = skew @Substitution(desc='product', outname='prod') @Appender(_agg_doc) def prod(self, axis='major', skipna=True): return self._reduce(nanops.nanprod, axis=axis, skipna=skipna) cls.prod = prod @Substitution(desc='compounded percentage', outname='compounded') @Appender(_agg_doc) def compound(self, axis='major', skipna=True): return (1 + self).prod(axis=axis, skipna=skipna) - 1 cls.compound = compound @Substitution(desc='median', outname='median') @Appender(_agg_doc) def median(self, axis='major', skipna=True): return self._reduce(nanops.nanmedian, axis=axis, skipna=skipna) cls.median = median @Substitution(desc='maximum', outname='maximum') @Appender(_agg_doc) def max(self, axis='major', skipna=True): return self._reduce(nanops.nanmax, axis=axis, skipna=skipna) cls.max = max @Substitution(desc='minimum', outname='minimum') @Appender(_agg_doc) def min(self, axis='major', skipna=True): return self._reduce(nanops.nanmin, axis=axis, skipna=skipna) cls.min = min