def panel_index(time, panels, names=['time', 'panel']): """ Returns a multi-index suitable for a panel-like DataFrame Parameters ---------- time : array-like Time index, does not have to repeat panels : array-like Panel index, does not have to repeat names : list, optional List containing the names of the indices Returns ------- multi_index : MultiIndex Time index is the first level, the panels are the second level. Examples -------- >>> years = range(1960,1963) >>> panels = ['A', 'B', 'C'] >>> panel_idx = panel_index(years, panels) >>> panel_idx MultiIndex([(1960, 'A'), (1961, 'A'), (1962, 'A'), (1960, 'B'), (1961, 'B'), (1962, 'B'), (1960, 'C'), (1961, 'C'), (1962, 'C')], dtype=object) or >>> import numpy as np >>> years = np.repeat(range(1960,1963), 3) >>> panels = np.tile(['A', 'B', 'C'], 3) >>> panel_idx = panel_index(years, panels) >>> panel_idx MultiIndex([(1960, 'A'), (1960, 'B'), (1960, 'C'), (1961, 'A'), (1961, 'B'), (1961, 'C'), (1962, 'A'), (1962, 'B'), (1962, 'C')], dtype=object) """ time, panels = _ensure_like_indices(time, panels) time_factor = Factor.from_array(time) panel_factor = Factor.from_array(panels) labels = [time_factor.labels, panel_factor.labels] levels = [time_factor.levels, panel_factor.levels] return MultiIndex(levels, labels, sortorder=None, names=names)
def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, precision=3, name=None): if name is None and isinstance(x, Series): name = x.name x = np.asarray(x) side = 'left' if right else 'right' ids = bins.searchsorted(x, side=side) mask = com.isnull(x) has_nas = mask.any() if labels is not False: if labels is None: labels = bins else: if len(labels) != len(bins): raise ValueError('labels must be same length as bins') fmt = lambda v: _format_label(v, precision=precision) if right: levels = [ '(%s, %s]' % (fmt(a), fmt(b)) for a, b in zip(labels, labels[1:]) ] else: levels = [ '[%s, %s)' % (fmt(a), fmt(b)) for a, b in zip(labels, labels[1:]) ] levels = np.asarray(levels, dtype=object) if has_nas: np.putmask(ids, mask, 0) fac = Factor(ids - 1, levels, name=name) else: fac = ids - 1 if has_nas: fac = ids.astype(np.float64) np.putmask(fac, mask, np.nan) if not retbins: return fac return fac, bins
def _read_panel_table(self, group, where=None): table = getattr(group, 'table') fields = table._v_attrs.fields # create the selection sel = Selection(table, where, table._v_attrs.index_kind) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) values = sel.values['values'] major = Factor.from_array(index) minor = Factor.from_array(columns) J, K = len(major.levels), len(minor.levels) key = major.labels * K + minor.labels if len(unique(key)) == len(key): sorter, _ = lib.groupsort_indexer(com._ensure_int64(key), J * K) sorter = com._ensure_platform_int(sorter) # the data need to be sorted sorted_values = values.take(sorter, axis=0) major_labels = major.labels.take(sorter) minor_labels = minor.labels.take(sorter) block = block2d_to_block3d(sorted_values, fields, (J, K), major_labels, minor_labels) mgr = BlockManager([block], [block.ref_items, major.levels, minor.levels]) wp = Panel(mgr) else: if not self._quiet: # pragma: no cover print( 'Duplicate entries in table, taking most recently ' 'appended') # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = DataFrame(values, index=long_index, columns=fields) # need a better algorithm tuple_index = long_index.get_tuple_index() unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) indexer = com._ensure_platform_int(indexer) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = DataFrame(new_values, index=new_index, columns=lp.columns) wp = lp.to_panel() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp
def _read_panel_table(self, group, where=None): table = getattr(group, 'table') fields = table._v_attrs.fields # create the selection sel = Selection(table, where, table._v_attrs.index_kind) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) values = sel.values['values'] major = Factor.from_array(index) minor = Factor.from_array(columns) J, K = len(major.levels), len(minor.levels) key = major.labels * K + minor.labels if len(unique(key)) == len(key): sorter, _ = lib.groupsort_indexer(com._ensure_int64(key), J * K) sorter = com._ensure_platform_int(sorter) # the data need to be sorted sorted_values = values.take(sorter, axis=0) major_labels = major.labels.take(sorter) minor_labels = minor.labels.take(sorter) block = block2d_to_block3d(sorted_values, fields, (J, K), major_labels, minor_labels) mgr = BlockManager([block], [block.ref_items, major.levels, minor.levels]) wp = Panel(mgr) else: if not self._quiet: # pragma: no cover print ('Duplicate entries in table, taking most recently ' 'appended') # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = DataFrame(values, index=long_index, columns=fields) # need a better algorithm tuple_index = long_index.get_tuple_index() unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) indexer = com._ensure_platform_int(indexer) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = DataFrame(new_values, index=new_index, columns=lp.columns) wp = lp.to_panel() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp
def test_constructor_unsortable(self): arr = np.array([1, 2, 3, datetime.now()], dtype='O') # it works! factor = Factor.from_array(arr)
def setUp(self): self.factor = Factor.from_array(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
def setUp(self): self.factor = Factor.from_array( ['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1)): zipped = zip(*keys) if names is None: names = [None] * len(zipped) if levels is None: levels = [Factor.from_array(zp).levels for zp in zipped] else: levels = [_ensure_index(x) for x in levels] else: zipped = [keys] if names is None: names = [None] if levels is None: levels = [_ensure_index(keys)] else: levels = [_ensure_index(x) for x in levels] if not _all_indexes_same(indexes): label_list = [] # things are potentially different sizes, so compute the exact labels # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): try: i = level.get_loc(key) except KeyError: raise ValueError('Key %s not in level %s' % (str(key), str(level))) to_concat.append(np.repeat(i, len(index))) label_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) label_list.extend(concat_index.labels) else: factor = Factor.from_array(concat_index) levels.append(factor.levels) label_list.append(factor.labels) if len(names) == len(levels): names = list(names) else: # also copies names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, labels=label_list, names=names) new_index = indexes[0] n = len(new_index) kpieces = len(indexes) # also copies new_names = list(names) new_levels = list(levels) # construct labels new_labels = [] # do something a bit more speedy for hlevel, level in zip(zipped, levels): hlevel = _ensure_index(hlevel) mapped = level.get_indexer(hlevel) mask = mapped == -1 if mask.any(): raise ValueError('Values not found in passed level: %s' % str(hlevel[mask])) new_labels.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) else: new_levels.append(new_index) new_labels.append(np.tile(np.arange(n), kpieces)) if len(new_names) < len(new_levels): new_names.extend(new_index.names) return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)
def _bins_to_cuts(x, bins, right=True, labels=None, retbins=False, precision=3, name=None): if name is None and isinstance(x, Series): name = x.name x = np.asarray(x) side = 'left' if right else 'right' ids = bins.searchsorted(x, side=side) na_mask = com.notnull(x) above = na_mask & (ids == len(bins)) below = na_mask & (ids == 0) if above.any(): raise ValueError('Values fall past last bin: %s' % str(x[above])) if below.any(): raise ValueError('Values fall before first bin: %s' % str(x[below])) mask = com.isnull(x) has_nas = mask.any() if labels is not False: if labels is None: fmt = lambda v: _format_label(v, precision=precision) if right: levels = [ '(%s, %s]' % (fmt(a), fmt(b)) for a, b in zip(bins, bins[1:]) ] else: levels = [ '[%s, %s)' % (fmt(a), fmt(b)) for a, b in zip(bins, bins[1:]) ] else: if len(labels) != len(bins) - 1: raise ValueError('Bin labels must be one fewer than ' 'the number of bin edges') levels = labels levels = np.asarray(levels, dtype=object) if has_nas: np.putmask(ids, mask, 0) fac = Factor(ids - 1, levels, name=name) else: fac = ids - 1 if has_nas: fac = ids.astype(np.float64) np.putmask(fac, mask, np.nan) if not retbins: return fac return fac, bins