def test_ensure_int32(): values = np.arange(10, dtype=np.int32) result = com._ensure_int32(values) assert (result.dtype == np.int32) values = np.arange(10, dtype=np.int64) result = com._ensure_int32(values) assert (result.dtype == np.int32)
def test_ensure_int32(): values = np.arange(10, dtype=np.int32) result = com._ensure_int32(values) assert(result.dtype == np.int32) values = np.arange(10, dtype=np.int64) result = com._ensure_int32(values) assert(result.dtype == np.int32)
def generate_groups(data, group_index, ngroups, axis=0, factory=lambda x: x): """ Parameters ---------- data : BlockManager Returns ------- generator """ group_index = com._ensure_int32(group_index) indexer = lib.groupsort_indexer(group_index, ngroups)[0] group_index = group_index.take(indexer) if isinstance(data, BlockManager): # this is sort of wasteful but... sorted_axis = data.axes[axis].take(indexer) sorted_data = data.reindex_axis(sorted_axis, axis=axis) if isinstance(data, Series): sorted_axis = data.index.take(indexer) sorted_data = data.reindex(sorted_axis) elif isinstance(data, DataFrame): sorted_data = data.take(indexer, axis=axis) if isinstance(sorted_data, DataFrame): def _get_slice(slob): if axis == 0: return sorted_data[slob] else: return sorted_data.ix[:, slob] elif isinstance(sorted_data, BlockManager): def _get_slice(slob): return factory(sorted_data.get_slice(slob, axis=axis)) elif isinstance(sorted_data, Series): def _get_slice(slob): return sorted_data._get_values(slob) else: # pragma: no cover def _get_slice(slob): return sorted_data[slob] starts, ends = lib.generate_slices(group_index, ngroups) for i, (start, end) in enumerate(zip(starts, ends)): # Since I'm now compressing the group ids, it's now not "possible" to # produce empty slices because such groups would not be observed in the # data assert (start < end) yield i, _get_slice(slice(start, end))
def group_info(self): if len(self.groupings) > 1: all_labels = [ping.labels for ping in self.groupings] group_index = get_group_index(all_labels, self.shape) comp_ids, obs_group_ids = _compress_group_index(group_index) else: ping = self.groupings[0] group_index = ping.labels comp_ids, obs_group_ids = _compress_group_index(group_index) ngroups = len(obs_group_ids) comp_ids = com._ensure_int32(comp_ids) return comp_ids, obs_group_ids, ngroups
def generate_groups(data, group_index, ngroups, axis=0, factory=lambda x: x): """ Parameters ---------- data : BlockManager Returns ------- generator """ group_index = com._ensure_int32(group_index) indexer = lib.groupsort_indexer(group_index, ngroups)[0] group_index = group_index.take(indexer) if isinstance(data, BlockManager): # this is sort of wasteful but... sorted_axis = data.axes[axis].take(indexer) sorted_data = data.reindex_axis(sorted_axis, axis=axis) if isinstance(data, Series): sorted_axis = data.index.take(indexer) sorted_data = data.reindex(sorted_axis) elif isinstance(data, DataFrame): sorted_data = data.take(indexer, axis=axis) if isinstance(sorted_data, DataFrame): def _get_slice(slob): if axis == 0: return sorted_data[slob] else: return sorted_data.ix[:, slob] elif isinstance(sorted_data, BlockManager): def _get_slice(slob): return factory(sorted_data.get_slice(slob, axis=axis)) elif isinstance(sorted_data, Series): def _get_slice(slob): return sorted_data._get_values(slob) else: # pragma: no cover def _get_slice(slob): return sorted_data[slob] starts, ends = lib.generate_slices(group_index, ngroups) for i, (start, end) in enumerate(zip(starts, ends)): # Since I'm now compressing the group ids, it's now not "possible" to # produce empty slices because such groups would not be observed in the # data assert(start < end) yield i, _get_slice(slice(start, end))
def group_info(self): comp_ids, obs_group_ids = self._get_compressed_labels() ngroups = len(obs_group_ids) comp_ids = com._ensure_int32(comp_ids) return comp_ids, obs_group_ids, ngroups