def datasets(self): table_proxy = self._table_proxy() # No grouping case if len(self.group_cols) == 0: order_taql = ordering_taql(table_proxy, self.index_cols, self.taql_where) orders = row_ordering(order_taql, self.index_cols, self.chunks[0]) datasets = [self._single_dataset(orders)] # Group by row elif len(self.group_cols) == 1 and self.group_cols[0] == "__row__": order_taql = ordering_taql(table_proxy, self.index_cols, self.taql_where) sorted_rows, row_runs = row_ordering( order_taql, self.index_cols, # chunk ordering on each row dict(self.chunks[0], row=1)) # Produce a dataset for each chunk (block), # each containing a single row row_blocks = sorted_rows.blocks run_blocks = row_runs.blocks # Exemplar actually correspond to the sorted rows. # We reify them here so they can be assigned on each # dataset as an attribute np_sorted_row = sorted_rows.compute() datasets = [ self._single_dataset((row_blocks[r], run_blocks[r]), exemplar_row=er) for r, er in enumerate(np_sorted_row) ] # Grouping column case else: order_taql = group_ordering_taql(table_proxy, self.group_cols, self.index_cols, self.taql_where) orders = group_row_ordering(order_taql, self.group_cols, self.index_cols, self.chunks) groups = [order_taql.getcol(g).result() for g in self.group_cols] exemplar_rows = order_taql.getcol("__firstrow__").result() assert len(orders) == len(exemplar_rows) datasets = self._group_datasets(groups, exemplar_rows, orders) ret = (datasets, ) if self.table_keywords is True: ret += (table_proxy.getkeywords().result(), ) if self.column_keywords is True: keywords = table_proxy.submit(_col_keyword_getter, READLOCK) ret += (keywords.result(), ) if len(ret) == 1: return ret[0] return ret
def test_row_ordering_no_group(ms, index_cols, chunks): order_taql = ordering_taql(table_proxy(ms), index_cols) assert_liveness(2, 1) orders = row_ordering(order_taql, index_cols, chunks) assert_liveness(2, 1) # Normalise chunks to match that of the output array expected_chunks = da.core.normalize_chunks(chunks['row'], (10, )) assert orders[0].chunks == expected_chunks rowids = dask.compute(orders[0])[0] assert_array_equal(rowids, [9, 8, 7, 6, 5, 4, 3, 2, 1, 0]) del orders, order_taql assert_liveness(0, 0)