def __init__(self, df, by, axis, level, as_index, sort, group_keys, squeeze, **kwargs): self._columns = df.columns self._index = df.index self._axis = axis self._row_metadata = df._row_metadata self._col_metadata = df._col_metadata if axis == 0: partitions = [column for column in df._block_partitions.T] self._index_grouped = pd.Series(self._index, index=self._index)\ .groupby(by=by, sort=sort) else: partitions = [row for row in df._block_partitions] self._index_grouped = pd.Series(self._columns, index=self._index)\ .groupby(by=by, sort=sort) self._keys_and_values = [(k, v) for k, v in self._index_grouped] self._grouped_partitions = \ list(zip(*(groupby._submit(args=(by, axis, level, as_index, sort, group_keys, squeeze) + tuple(part.tolist()), num_return_vals=len(self)) for part in partitions)))
def _grouped_partitions(self): # It is expensive to put this multiple times, so let's just put it once remote_by = ray.put(self._by) if len(self._index_grouped) > 1: return zip(*(groupby._submit(args=(remote_by, self._axis, self._level, self._as_index, self._sort, self._group_keys, self._squeeze) + tuple(part.tolist()), num_return_vals=len( self._index_grouped)) for part in self._partitions)) elif self._axis == 0: return [self._df._col_partitions] else: return [self._df._row_partitions]
def _grouped_partitions(self): # It is expensive to put this multiple times, so let's just put it once remote_by = ray.put(self._by) remote_index = \ [ray.put(v.index) for _, v in self._df._col_metadata._coord_df.copy().groupby(by='partition')] \ if self._axis == 0 \ else [ray.put(v.index) for _, v in self._df._row_metadata._coord_df.copy() .groupby(by='partition')] if len(self._index_grouped) > 1: return zip(*( groupby._submit(args=(remote_index[i], remote_by, self._axis, self._level, self._as_index, self._sort, self._group_keys, self._squeeze) + tuple(part.tolist()), num_return_vals=len(self._index_grouped)) for i, part in enumerate(self._partitions))) elif self._axis == 0: return [self._df._col_partitions] else: return [self._df._row_partitions]