def _make_labels(self): if self._was_factor: # pragma: no cover raise Exception('Should not call this method grouping by level') else: values = self.grouper if values.dtype != np.object_: values = values.astype('O') # khash rizer = lib.Factorizer(len(values)) labels, counts = rizer.factorize(values, sort=False) uniques = Index(rizer.uniques, name=self.name) if self.sort and len(counts) > 0: sorter = uniques.argsort() reverse_indexer = np.empty(len(sorter), dtype=np.int32) reverse_indexer.put(sorter, np.arange(len(sorter))) mask = labels < 0 labels = reverse_indexer.take(labels) np.putmask(labels, mask, -1) uniques = uniques.take(sorter) counts = counts.take(sorter) self._labels = labels self._group_index = uniques self._counts = counts
def _set_grouper(self, obj, sort=False): """ given an object and the specifications, setup the internal grouper for this particular specification Parameters ---------- obj : the subject object sort : bool, default False whether the resulting grouper should be sorted """ if self.key is not None and self.level is not None: raise ValueError( "The Grouper cannot specify both a key and a level!") # Keep self.grouper value before overriding if self._grouper is None: self._grouper = self.grouper # the key must be a valid info item if self.key is not None: key = self.key # The 'on' is already defined if getattr(self.grouper, "name", None) == key and isinstance( obj, ABCSeries): ax = self._grouper.take(obj.index) else: if key not in obj._info_axis: raise KeyError( "The grouper name {0} is not found".format(key)) ax = Index(obj[key], name=key) else: ax = obj._get_axis(self.axis) if self.level is not None: level = self.level # if a level is given it must be a mi level or # equivalent to the axis name if isinstance(ax, MultiIndex): level = ax._get_level_number(level) ax = Index(ax._get_level_values(level), name=ax.names[level]) else: if level not in (0, ax.name): raise ValueError( "The level {0} is not valid".format(level)) # possibly sort if (self.sort or sort) and not ax.is_monotonic: # use stable sort to support first, last, nth indexer = self.indexer = ax.argsort(kind="mergesort") ax = ax.take(indexer) obj = obj.take(indexer, axis=self.axis, is_copy=False) self.obj = obj self.grouper = ax return self.grouper
def _set_grouper(self, obj, sort=False): """ given an object and the specifications, setup the internal grouper for this particular specification Parameters ---------- obj : the subject object sort : bool, default False whether the resulting grouper should be sorted """ if self.key is not None and self.level is not None: raise ValueError( "The Grouper cannot specify both a key and a level!") # Keep self.grouper value before overriding if self._grouper is None: self._grouper = self.grouper # the key must be a valid info item if self.key is not None: key = self.key # The 'on' is already defined if (getattr(self.grouper, 'name', None) == key and isinstance(obj, ABCSeries)): ax = self._grouper.take(obj.index) else: if key not in obj._info_axis: raise KeyError( "The grouper name {0} is not found".format(key)) ax = Index(obj[key], name=key) else: ax = obj._get_axis(self.axis) if self.level is not None: level = self.level # if a level is given it must be a mi level or # equivalent to the axis name if isinstance(ax, MultiIndex): level = ax._get_level_number(level) ax = Index(ax._get_level_values(level), name=ax.names[level]) else: if level not in (0, ax.name): raise ValueError( "The level {0} is not valid".format(level)) # possibly sort if (self.sort or sort) and not ax.is_monotonic: # use stable sort to support first, last, nth indexer = self.indexer = ax.argsort(kind='mergesort') ax = ax.take(indexer) obj = obj._take(indexer, axis=self.axis, is_copy=False) self.obj = obj self.grouper = ax return self.grouper
def _sort_labels(uniques, left, right): if not isinstance(uniques, np.ndarray): # tuplesafe uniques = Index(uniques).values sorter = uniques.argsort() reverse_indexer = np.empty(len(sorter), dtype=np.int32) reverse_indexer.put(sorter, np.arange(len(sorter))) return reverse_indexer.take(left), reverse_indexer.take(right)
def unique_with_labels(values): from pandas.core.index import Index rizer = lib.Factorizer(len(values)) labels, _ = rizer.factorize(values, sort=False) uniques = Index(rizer.uniques) try: sorter = uniques.argsort() reverse_indexer = np.empty(len(sorter), dtype='i4') reverse_indexer.put(sorter, np.arange(len(sorter))) labels = reverse_indexer.take(labels) uniques = uniques.take(sorter) except TypeError: pass return uniques, labels
def _sort_labels(uniques, left, right): if not isinstance(uniques, np.ndarray): # tuplesafe uniques = Index(uniques).values sorter = uniques.argsort() reverse_indexer = np.empty(len(sorter), dtype=np.int64) reverse_indexer.put(sorter, np.arange(len(sorter))) new_left = reverse_indexer.take(com._ensure_platform_int(left)) np.putmask(new_left, left == -1, -1) new_right = reverse_indexer.take(com._ensure_platform_int(right)) np.putmask(new_right, right == -1, -1) return new_left, new_right