def test_booleanindex(self): boolIdx = np.repeat(True, len(self.strIndex)).astype(bool) boolIdx[5:30:2] = False subIndex = self.strIndex[boolIdx] tm.assert_dict_equal(tseries.map_indices_object(subIndex), subIndex.indexMap) subIndex = self.strIndex[list(boolIdx)] tm.assert_dict_equal(tseries.map_indices_object(subIndex), subIndex.indexMap)
def indexMap(self): "{label -> location}" if self._indexMap is None: self._indexMap = lib.map_indices_object(self) self._integrity = len(self._indexMap) == len(self) if not self._integrity: raise Exception("Index cannot contain duplicate values!") return self._indexMap
def _read_panel_table(self, group, where=None): from pandas.core.common import _asarray_tuplesafe table = getattr(group, 'table') # create the selection sel = Selection(table, where) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = LongPanel(sel.values['values'], index=long_index, columns=fields) if lp.consistent: lp = lp.sortlevel(level=0) wp = lp.to_wide() else: if not self._quiet: # pragma: no cover print ('Duplicate entries in table, taking most recently ' 'appended') # need a better algorithm tuple_index = long_index.get_tuple_index() index_map = lib.map_indices_object(tuple_index) unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = lib.merge_indexer_object(unique_tuples, index_map) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = LongPanel(new_values, index=new_index, columns=lp.columns) wp = lp.to_wide() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp
def _read_panel_table(self, group, where=None): from pandas.core.common import _asarray_tuplesafe table = getattr(group, 'table') # create the selection sel = Selection(table, where) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = LongPanel(sel.values['values'], index=long_index, columns=fields) if lp.consistent: lp = lp.sortlevel(level=0) wp = lp.to_wide() else: if not self._quiet: # pragma: no cover print( 'Duplicate entries in table, taking most recently ' 'appended') # need a better algorithm tuple_index = long_index.get_tuple_index() index_map = lib.map_indices_object(tuple_index) unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = lib.merge_indexer_object(unique_tuples, index_map) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = LongPanel(new_values, index=new_index, columns=lp.columns) wp = lp.to_wide() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp
def test_pad_backfill_object_segfault(): from datetime import datetime old = np.array([], dtype='O') new = np.array([datetime(2010, 12, 31)], dtype='O') result = lib.pad_object(old, new, lib.map_indices_object(old), lib.map_indices_object(new)) expected = np.array([-1], dtype='i4') assert (np.array_equal(result, expected)) result = lib.pad_object(new, old, lib.map_indices_object(new), lib.map_indices_object(old)) expected = np.array([], dtype='i4') assert (np.array_equal(result, expected)) result = lib.backfill_object(old, new, lib.map_indices_object(old), lib.map_indices_object(new)) expected = np.array([-1], dtype='i4') assert (np.array_equal(result, expected)) result = lib.backfill_object(new, old, lib.map_indices_object(new), lib.map_indices_object(old)) expected = np.array([], dtype='i4') assert (np.array_equal(result, expected))
def test_pad_backfill_object_segfault(): from datetime import datetime old = np.array([], dtype='O') new = np.array([datetime(2010, 12, 31)], dtype='O') result = lib.pad_object(old, new, lib.map_indices_object(old), lib.map_indices_object(new)) expected = np.array([-1], dtype='i4') assert(np.array_equal(result, expected)) result = lib.pad_object(new, old, lib.map_indices_object(new), lib.map_indices_object(old)) expected = np.array([], dtype='i4') assert(np.array_equal(result, expected)) result = lib.backfill_object(old, new, lib.map_indices_object(old), lib.map_indices_object(new)) expected = np.array([-1], dtype='i4') assert(np.array_equal(result, expected)) result = lib.backfill_object(new, old, lib.map_indices_object(new), lib.map_indices_object(old)) expected = np.array([], dtype='i4') assert(np.array_equal(result, expected))
def _read_panel_table(self, group, where=None): from pandas.core.index import unique_int64, Factor from pandas.core.common import _asarray_tuplesafe from pandas.core.internals import BlockManager from pandas.core.reshape import block2d_to_block3d table = getattr(group, 'table') fields = table._v_attrs.fields # create the selection sel = Selection(table, where, table._v_attrs.index_kind) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) values = sel.values['values'] major = Factor(index) minor = Factor(columns) J, K = len(major.levels), len(minor.levels) key = major.labels * K + minor.labels if len(unique_int64(key)) == len(key): sorter, _ = lib.groupsort_indexer(key, J * K) # the data need to be sorted sorted_values = values.take(sorter, axis=0) major_labels = major.labels.take(sorter) minor_labels = minor.labels.take(sorter) block = block2d_to_block3d(sorted_values, fields, (J, K), major_labels, minor_labels) mgr = BlockManager([block], [block.items, major.levels, minor.levels]) wp = Panel(mgr) else: if not self._quiet: # pragma: no cover print ('Duplicate entries in table, taking most recently ' 'appended') # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = DataFrame(values, index=long_index, columns=fields) # need a better algorithm tuple_index = long_index.get_tuple_index() index_map = lib.map_indices_object(tuple_index) unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = match(unique_tuples, tuple_index) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = DataFrame(new_values, index=new_index, columns=lp.columns) wp = lp.to_panel() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp
def lookup_python(values): table = lib.map_indices_object(values) return _timeit(lambda: lib.merge_indexer_object(values, table))
def map_locations_python_object(): arr = string_test_data(N) return _timeit(lambda: lib.map_indices_object(arr))
def _read_panel_table(self, group, where=None): from pandas.core.index import unique_int64, Factor from pandas.core.common import _asarray_tuplesafe from pandas.core.internals import BlockManager from pandas.core.reshape import block2d_to_block3d table = getattr(group, 'table') # create the selection sel = Selection(table, where) sel.select() fields = table._v_attrs.fields columns = _maybe_convert(sel.values['column'], table._v_attrs.columns_kind) index = _maybe_convert(sel.values['index'], table._v_attrs.index_kind) values = sel.values['values'] major = Factor(index) minor = Factor(columns) J, K = len(major.levels), len(minor.levels) key = major.labels * K + minor.labels if len(unique_int64(key)) == len(key): sorter, _ = lib.groupsort_indexer(key, J * K) # the data need to be sorted sorted_values = values.take(sorter, axis=0) major_labels = major.labels.take(sorter) minor_labels = minor.labels.take(sorter) block = block2d_to_block3d(sorted_values, fields, (J, K), major_labels, minor_labels) mgr = BlockManager([block], [block.items, major.levels, minor.levels]) wp = Panel(mgr) else: if not self._quiet: # pragma: no cover print( 'Duplicate entries in table, taking most recently ' 'appended') # reconstruct long_index = MultiIndex.from_arrays([index, columns]) lp = DataFrame(values, index=long_index, columns=fields) # need a better algorithm tuple_index = long_index.get_tuple_index() index_map = lib.map_indices_object(tuple_index) unique_tuples = lib.fast_unique(tuple_index) unique_tuples = _asarray_tuplesafe(unique_tuples) indexer = lib.merge_indexer_object(unique_tuples, index_map) new_index = long_index.take(indexer) new_values = lp.values.take(indexer, axis=0) lp = DataFrame(new_values, index=new_index, columns=lp.columns) wp = lp.to_panel() if sel.column_filter: new_minor = sorted(set(wp.minor_axis) & sel.column_filter) wp = wp.reindex(minor=new_minor) return wp