class TestFactor(unittest.TestCase): def setUp(self): self.factor = Factor(["a", "b", "b", "a", "a", "c", "c", "c"]) def test_getitem(self): self.assertEqual(self.factor[0], "a") self.assertEqual(self.factor[-1], "c") subf = self.factor[[0, 1, 2]] tm.assert_almost_equal(subf.labels, [0, 1, 1]) subf = self.factor[self.factor.asarray() == "c"] tm.assert_almost_equal(subf.labels, [2, 2, 2]) def test_constructor_unsortable(self): arr = np.array([1, 2, 3, datetime.now()], dtype="O") # it works! factor = Factor(arr) def test_factor_agg(self): import pandas.core.frame as frame arr = np.arange(len(self.factor)) f = np.sum agged = frame.factor_agg(self.factor, arr, f) labels = self.factor.labels for i, idx in enumerate(self.factor.levels): self.assertEqual(f(arr[labels == i]), agged[i])
class TestFactor(unittest.TestCase): def setUp(self): self.factor = Factor(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) def test_getitem(self): self.assertEqual(self.factor[0], 'a') self.assertEqual(self.factor[-1], 'c') subf = self.factor[[0, 1, 2]] tm.assert_almost_equal(subf.labels, [0, 1, 1]) subf = self.factor[self.factor.asarray() == 'c'] tm.assert_almost_equal(subf.labels, [2, 2, 2]) def test_factor_agg(self): import pandas.core.frame as frame arr = np.arange(len(self.factor)) f = np.sum agged = frame.factor_agg(self.factor, arr, f) labels = self.factor.labels for i, idx in enumerate(self.factor.levels): self.assertEqual(f(arr[labels == i]), agged[i])
class TestFactor(unittest.TestCase): def setUp(self): self.factor = Factor(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) def test_getitem(self): self.assertEqual(self.factor[0], 'a') self.assertEqual(self.factor[-1], 'c') subf = self.factor[[0, 1, 2]] tm.assert_almost_equal(subf.labels, [0, 1, 1]) subf = self.factor[self.factor.asarray() == 'c'] tm.assert_almost_equal(subf.labels, [2, 2, 2]) def test_constructor_unsortable(self): arr = np.array([1, 2, 3, datetime.now()], dtype='O') # it works! factor = Factor(arr) def test_factor_agg(self): import pandas.core.frame as frame arr = np.arange(len(self.factor)) f = np.sum agged = frame.factor_agg(self.factor, arr, f) labels = self.factor.labels for i, idx in enumerate(self.factor.levels): self.assertEqual(f(arr[labels == i]), agged[i])
def panel_index(time, panels, names=['time', 'panel']): """ Returns a multi-index suitable for a panel-like DataFrame Parameters ---------- time : array-like Time index, does not have to repeat panels : array-like Panel index, does not have to repeat names : list, optional List containing the names of the indices Returns ------- multi_index : MultiIndex Time index is the first level, the panels are the second level. Examples -------- >>> years = range(1960,1963) >>> panels = ['A', 'B', 'C'] >>> panel_idx = panel_index(years, panels) >>> panel_idx MultiIndex([(1960, 'A'), (1961, 'A'), (1962, 'A'), (1960, 'B'), (1961, 'B'), (1962, 'B'), (1960, 'C'), (1961, 'C'), (1962, 'C')], dtype=object) or >>> import numpy as np >>> years = np.repeat(range(1960,1963), 3) >>> panels = np.tile(['A', 'B', 'C'], 3) >>> panel_idx = panel_index(years, panels) >>> panel_idx MultiIndex([(1960, 'A'), (1960, 'B'), (1960, 'C'), (1961, 'A'), (1961, 'B'), (1961, 'C'), (1962, 'A'), (1962, 'B'), (1962, 'C')], dtype=object) """ time, panels = _ensure_like_indices(time, panels) time_factor = Factor(time) panel_factor = Factor(panels) labels = [time_factor.labels, panel_factor.labels] levels = [time_factor.levels, panel_factor.levels] return MultiIndex(levels, labels, sortorder=None, names=names)
def fromRecords(cls, data, major_field, minor_field, exclude=None): """ Create LongPanel from DataFrame or record / structured ndarray object Parameters ---------- data : DataFrame, structured or record array, or dict major_field : string minor_field : string Name of field exclude : list-like, default None Returns ------- LongPanel """ if isinstance(data, np.ndarray): # Dtype when you have data if not issubclass(data.dtype.type, np.void): raise ValueError('Input was not a structured array!') columns = data.dtype.names data = dict((k, data[k]) for k in columns) elif isinstance(data, DataFrame): data = data._series.copy() elif isinstance(data, dict): # otherwise will pop columns out of original data = data.copy() if exclude is None: exclude = set() else: exclude = set(exclude) for col in exclude: del data[col] major = Factor.fromarray(data.pop(major_field)) minor = Factor.fromarray(data.pop(minor_field)) index = MultiIndex(levels=[major.levels, minor.levels], labels=[major.labels, minor.labels]) return LongPanel(data, index=index)
def setUp(self): self.factor = Factor(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
def _make_concat_multiindex(indexes, keys, levels=None, names=None): if ((levels is None and isinstance(keys[0], tuple)) or (levels is not None and len(levels) > 1)): zipped = zip(*keys) if names is None: names = [None] * len(zipped) if levels is None: levels = [Factor(zp).levels for zp in zipped] else: levels = [_ensure_index(x) for x in levels] else: zipped = [keys] if names is None: names = [None] if levels is None: levels = [_ensure_index(keys)] else: levels = [_ensure_index(x) for x in levels] if not _all_indexes_same(indexes): label_list = [] # things are potentially different sizes, so compute the exact labels # for each level and pass those to MultiIndex.from_arrays for hlevel, level in zip(zipped, levels): to_concat = [] for key, index in zip(hlevel, indexes): i = level.get_loc(key) to_concat.append(np.repeat(i, len(index))) label_list.append(np.concatenate(to_concat)) concat_index = _concat_indexes(indexes) # these go at the end if isinstance(concat_index, MultiIndex): levels.extend(concat_index.levels) label_list.extend(concat_index.labels) else: factor = Factor(concat_index) levels.append(factor.levels) label_list.append(factor.labels) # also copies names = names + _get_consensus_names(indexes) return MultiIndex(levels=levels, labels=label_list, names=names) new_index = indexes[0] n = len(new_index) kpieces = len(indexes) # also copies new_names = list(names) new_levels = list(levels) # construct labels new_labels = [] # do something a bit more speedy for hlevel, level in zip(zipped, levels): mapped = level.get_indexer(hlevel) new_labels.append(np.repeat(mapped, n)) if isinstance(new_index, MultiIndex): new_levels.extend(new_index.levels) new_labels.extend([np.tile(lab, kpieces) for lab in new_index.labels]) new_names.extend(new_index.names) else: new_levels.append(new_index) new_names.append(new_index.name) new_labels.append(np.tile(np.arange(n), kpieces)) return MultiIndex(levels=new_levels, labels=new_labels, names=new_names)
def setUp(self): self.factor = Factor(["a", "b", "b", "a", "a", "c", "c", "c"])
def setUp(self): self.factor = Factor.fromarray(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c'])
def _read_panel_table(self, group, where=None): from pandas.core.index import unique_int64, Factor from pandas.core.common import _asarray_tuplesafe from pandas.core.internals import BlockManager from pandas.core.reshape import block2d_to_block3d table = getattr(group, 'table') # create the selection sel = Selection(table, where) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) values = sel.values['values'] major = Factor(index) minor = Factor(columns) J, K = len(major.levels), len(minor.levels) key = major.labels * K + minor.labels if len(unique_int64(key)) == len(key): sorter, _ = lib.groupsort_indexer(key, J * K) # the data need to be sorted sorted_values = values.take(sorter, axis=0) major_labels = major.labels.take(sorter) minor_labels = minor.labels.take(sorter) block = block2d_to_block3d(sorted_values, fields, (J, K), major_labels, minor_labels) mgr = BlockManager([block], [block.items, major.levels, minor.levels]) wp = Panel(mgr) else: if not self._quiet: # pragma: no cover print( 'Duplicate entries in table, taking most recently ' 'appended') # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = DataFrame(values, index=long_index, columns=fields) # need a better algorithm tuple_index = long_index.get_tuple_index() index_map = lib.map_indices_object(tuple_index) unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = lib.merge_indexer_object(unique_tuples, index_map) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = DataFrame(new_values, index=new_index, columns=lp.columns) wp = lp.to_panel() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp
def test_constructor_unsortable(self): arr = np.array([1, 2, 3, datetime.now()], dtype='O') # it works! factor = Factor(arr)