def _unique_segments(self): """ Common code for unique, unique_count and value_counts""" # make dense column densecol = self.replace(data=self.to_dense_buffer(), mask=None) # sort the column sortcol, _ = densecol.sort_by_values(ascending=True) # find segments sortedvals = sortcol.to_gpu_array() segs, begins = cudautils.find_segments(sortedvals) return segs, sortedvals
def _unique_segments(self): """ Common code for unique, unique_count and value_counts""" # make dense column densecol = self.dropna() # sort the column sortcol, _ = densecol.sort_by_values() # find segments sortedvals = sortcol.data_array_view segs, begins = cudautils.find_segments(sortedvals) return segs, sortedvals
def _group_inner_levels(self, columns, rowidcol, segs, markers): """Group the second and onwards level. Parameters ---------- columns : sequence[str] Group keys. The order is important. rowid_column : str The name of the special column with the original rowid. It's internally used to determine the shuffling order. df : DataFrame The dataframe being grouped. segs : Series First level group begin offsets. Returns ------- (sorted_keys, reordering_indices, segments) - sorted_keys : list[Series] List of sorted key columns. Column order is same as arg *columns*. - reordering_indices : device array The indices to gather on to shuffle the dataframe into the grouped seqence. - segments : Series Group begin offsets. """ dsegs = segs.astype(dtype=np.int32).data.mem sorted_keys = [] plan_cache = {} for col in columns: # Shuffle the key column according to the previous groups srkeys = self._df[col].take(rowidcol.to_gpu_array(), ignore_index=True) # Segmented sort on the key shuf = Column(Buffer(cudautils.arange(len(srkeys)))) cache_key = (len(srkeys), srkeys.dtype, shuf.dtype) plan = plan_cache.get(cache_key) plan = apply_segsort(srkeys._column, shuf, dsegs, plan=plan) plan_cache[cache_key] = plan sorted_keys.append(srkeys) # keep sorted key cols # Determine segments dsegs, markers = cudautils.find_segments(srkeys.to_gpu_array(), dsegs, markers=markers) # Shuffle rowidcol = rowidcol.take(shuf.to_gpu_array(), ignore_index=True) reordering_indices = rowidcol.to_gpu_array() return sorted_keys, reordering_indices, Series(dsegs)
def _find_segments(self): seg, markers = cudautils.find_segments(self.gpu_values) return NumericalColumn(data=Buffer(seg), dtype=seg.dtype), markers
def _find_segments(self): seg, markers = cudautils.find_segments(self.gpu_values) return ( column.build_column(data=Buffer(seg), dtype=seg.dtype), markers, )