Exemple #1
0
def test_ordering_multiple_groups(ms, group_cols, index_cols):
    group_taql = group_ordering_taql(table_proxy(ms), group_cols, index_cols)
    assert_liveness(2, 1)
    orders = group_row_ordering(group_taql, group_cols, index_cols, [{
        'row': 2
    }])
    assert_liveness(2, 1)
    first_rows = group_taql.getcol("__firstrow__").result()
    assert_liveness(2, 1)

    assert len(first_rows) == len(orders) == 6

    assert_array_equal(first_rows, [0, 1, 3, 4, 7, 8])

    rowid_arrays = tuple(o[0] for o in orders)
    rowids = dask.compute(rowid_arrays)[0]

    assert_array_equal(rowids[0], [2, 0])
    assert_array_equal(rowids[1], [1])
    assert_array_equal(rowids[2], [5, 3])
    assert_array_equal(rowids[3], [6, 4])
    assert_array_equal(rowids[4], [9, 7])
    assert_array_equal(rowids[5], [8])

    del first_rows, orders, rowid_arrays, group_taql
    assert_liveness(0, 0)
Exemple #2
0
def test_row_ordering_multiple_groups(ms, group_cols, index_cols, chunks):
    group_taql = group_ordering_taql(table_proxy(ms), group_cols, index_cols)
    assert_liveness(2, 1)
    orders = group_row_ordering(group_taql, group_cols, index_cols, chunks)
    assert_liveness(2, 1)
    first_rows = group_taql.getcol("__firstrow__").result()
    assert_liveness(2, 1)

    # We get two groups out
    assert len(orders) == len(first_rows) == 2
    assert_array_equal(first_rows, [0, 7])

    rowid_arrays = tuple(o[0] for o in orders)
    rowids = dask.compute(rowid_arrays)[0]

    # Check the two resulting groups

    # Normalise chunks to match that of the output array
    row_chunks = chunks[0]['row']
    expected_chunks = da.core.normalize_chunks(row_chunks, (7, ))
    assert_array_equal(rowids[0], [6, 5, 4, 3, 2, 1, 0])
    assert rowid_arrays[0].chunks == expected_chunks

    # If chunks only supplied for the first group, re-use it's chunking
    row_chunks = chunks[0]['row'] if len(chunks) == 1 else chunks[1]['row']
    expected_chunks = da.core.normalize_chunks(row_chunks, (3, ))
    assert_array_equal(rowids[1], [9, 8, 7])
    assert rowid_arrays[1].chunks == expected_chunks

    del first_rows, orders, rowid_arrays, group_taql
    assert_liveness(0, 0)
Exemple #3
0
    def datasets(self):
        table_proxy = self._table_proxy()

        # No grouping case
        if len(self.group_cols) == 0:
            order_taql = ordering_taql(table_proxy, self.index_cols,
                                       self.taql_where)
            orders = row_ordering(order_taql, self.index_cols, self.chunks[0])
            datasets = [self._single_dataset(orders)]
        # Group by row
        elif len(self.group_cols) == 1 and self.group_cols[0] == "__row__":
            order_taql = ordering_taql(table_proxy, self.index_cols,
                                       self.taql_where)
            sorted_rows, row_runs = row_ordering(
                order_taql,
                self.index_cols,
                # chunk ordering on each row
                dict(self.chunks[0], row=1))

            # Produce a dataset for each chunk (block),
            # each containing a single row
            row_blocks = sorted_rows.blocks
            run_blocks = row_runs.blocks

            # Exemplar actually correspond to the sorted rows.
            # We reify them here so they can be assigned on each
            # dataset as an attribute
            np_sorted_row = sorted_rows.compute()

            datasets = [
                self._single_dataset((row_blocks[r], run_blocks[r]),
                                     exemplar_row=er)
                for r, er in enumerate(np_sorted_row)
            ]
        # Grouping column case
        else:
            order_taql = group_ordering_taql(table_proxy, self.group_cols,
                                             self.index_cols, self.taql_where)
            orders = group_row_ordering(order_taql, self.group_cols,
                                        self.index_cols, self.chunks)

            groups = [order_taql.getcol(g).result() for g in self.group_cols]
            exemplar_rows = order_taql.getcol("__firstrow__").result()
            assert len(orders) == len(exemplar_rows)

            datasets = self._group_datasets(groups, exemplar_rows, orders)

        ret = (datasets, )

        if self.table_keywords is True:
            ret += (table_proxy.getkeywords().result(), )

        if self.column_keywords is True:
            keywords = table_proxy.submit(_col_keyword_getter, READLOCK)
            ret += (keywords.result(), )

        if len(ret) == 1:
            return ret[0]

        return ret
Exemple #4
0
def test_ordering_query_taql_where_strings(ms, group_cols, index_cols):
    taql = group_ordering_taql(table_proxy(ms),
                               group_cols,
                               index_cols,
                               taql_where="ANTENNA1 != ANTENNA2")
    assert taql._args[0].replace(
        "\t", " " * 4) == ("SELECT\n"
                           "    FIELD_ID,\n"
                           "    SCAN_NUMBER,\n"
                           "    GAGGR(TIME) as GROUP_TIME,\n"
                           "    GAGGR(ANTENNA1) as GROUP_ANTENNA1,\n"
                           "    GAGGR(ANTENNA2) as GROUP_ANTENNA2,\n"
                           "    GROWID() AS __tablerow__,\n"
                           "    GCOUNT() as __tablerows__,\n"
                           "    GROWID()[0] as __firstrow__\n"
                           "FROM\n"
                           "    $1\n"
                           "WHERE\n"
                           "    ANTENNA1 != ANTENNA2\n"
                           "GROUPBY\n"
                           "    FIELD_ID,\n"
                           "    SCAN_NUMBER")

    taql = group_ordering_taql(table_proxy(ms), group_cols, index_cols)
    assert taql._args[0].replace(
        "\t", " " * 4) == ("SELECT\n"
                           "    FIELD_ID,\n"
                           "    SCAN_NUMBER,\n"
                           "    GAGGR(TIME) as GROUP_TIME,\n"
                           "    GAGGR(ANTENNA1) as GROUP_ANTENNA1,\n"
                           "    GAGGR(ANTENNA2) as GROUP_ANTENNA2,\n"
                           "    GROWID() AS __tablerow__,\n"
                           "    GCOUNT() as __tablerows__,\n"
                           "    GROWID()[0] as __firstrow__\n"
                           "FROM\n"
                           "    $1\n"
                           "GROUPBY\n"
                           "    FIELD_ID,\n"
                           "    SCAN_NUMBER")

    taql = group_ordering_taql(table_proxy(ms), group_cols, [])
    assert taql._args[0].replace("\t", " " *
                                 4) == ("SELECT\n"
                                        "    FIELD_ID,\n"
                                        "    SCAN_NUMBER,\n"
                                        "    GROWID() AS __tablerow__,\n"
                                        "    GCOUNT() as __tablerows__,\n"
                                        "    GROWID()[0] as __firstrow__\n"
                                        "FROM\n"
                                        "    $1\n"
                                        "GROUPBY\n"
                                        "    FIELD_ID,\n"
                                        "    SCAN_NUMBER")

    taql = ordering_taql(table_proxy(ms),
                         index_cols,
                         taql_where="ANTENNA1 != ANTENNA2")
    assert taql._args[0].replace("\t",
                                 " " * 4) == ("SELECT\n"
                                              "    ROWID() as __tablerow__\n"
                                              "FROM\n"
                                              "    $1\n"
                                              "WHERE\n"
                                              "    ANTENNA1 != ANTENNA2\n"
                                              "ORDERBY\n"
                                              "    TIME,\n"
                                              "    ANTENNA1,\n"
                                              "    ANTENNA2")

    taql = ordering_taql(table_proxy(ms), index_cols)
    assert taql._args[0].replace("\t",
                                 " " * 4) == ("SELECT\n"
                                              "    ROWID() as __tablerow__\n"
                                              "FROM\n"
                                              "    $1\n"
                                              "ORDERBY\n"
                                              "    TIME,\n"
                                              "    ANTENNA1,\n"
                                              "    ANTENNA2")

    taql = ordering_taql(table_proxy(ms), [])
    assert taql._args[0].replace("\t",
                                 " " * 4) == ("SELECT\n"
                                              "    ROWID() as __tablerow__\n"
                                              "FROM\n"
                                              "    $1\n")