コード例 #1
0
ファイル: test_table_proxy.py プロジェクト: ska-sa/dask-ms
def test_embedding_table_proxy_in_taql(ms, reverse):
    """ Test using a TableProxy to create a TAQL TableProxy """
    proxy = TableProxy(pt.table, ms, ack=False, readonly=True)
    query = "SELECT UNIQUE ANTENNA1 FROM $1"
    taql_proxy = TableProxy(taql_factory, query, tables=[proxy])
    assert_array_equal(taql_proxy.getcol("ANTENNA1").result(), [0, 1, 2])

    # TAQL and original table
    assert_liveness(2, 1)

    if reverse:
        del proxy
        # TAQL still references original table
        assert_liveness(2, 1)

        # Remove TAQL now results in everything clearing up
        del taql_proxy
        assert_liveness(0, 0)
    else:
        # Removing TAQL should leave original table
        del taql_proxy
        assert_liveness(1, 1)

        # Removing proxy removes the last
        del proxy
        assert_liveness(0, 0)
コード例 #2
0
ファイル: convert.py プロジェクト: ska-sa/dask-ms
    def from_store(store):
        typ = store.type()

        if typ == "casa":
            from daskms.table_proxy import TableProxy
            import pyrap.tables as pt
            table_proxy = TableProxy(pt.table, str(store.casa_path()),
                                     readonly=True, ack=False)
            keywords = table_proxy.getkeywords().result()

            try:
                version = str(keywords["MS_VERSION"])
            except KeyError:
                typ = "plain"
                version = "<unspecified>"
            else:
                typ = "measurementset"

            subtables = CasaFormat.find_subtables(keywords)
            return CasaFormat(version, subtables, typ)
        elif typ == "zarr":
            subtables = ZarrFormat.find_subtables(store)
            return ZarrFormat("0.1", subtables)
        elif typ == "parquet":
            subtables = ParquetFormat.find_subtables(store)
            return ParquetFormat("0.1", subtables)
        else:
            raise ValueError(f"Unexpected table type {typ}")
コード例 #3
0
def test_ms_read(ms, group_cols, index_cols, select_cols):
    xds = xds_from_ms(ms, columns=select_cols,
                      group_cols=group_cols,
                      index_cols=index_cols,
                      chunks={"row": 2})

    order = orderby_clause(index_cols)
    np_column_data = []

    with TableProxy(pt.table, ms, lockoptions='auto', ack=False) as T:
        for ds in xds:
            assert "ROWID" in ds.coords
            group_col_values = [ds.attrs[a] for a in group_cols]
            where = where_clause(group_cols, group_col_values)
            query = f"SELECT * FROM $1 {where} {order}"

            with TableProxy(taql_factory, query, tables=[T]) as Q:
                column_data = {c: Q.getcol(c).result() for c in select_cols}
                np_column_data.append(column_data)

    del T

    for d, (ds, column_data) in enumerate(zip(xds, np_column_data)):
        for c in select_cols:
            dask_data = ds.data_vars[c].data.compute()
            assert_array_equal(column_data[c], dask_data)
コード例 #4
0
def group_ordering_taql(table_proxy, group_cols, index_cols, taql_where=''):
    if len(group_cols) == 0:
        raise ValueError("group_ordering_taql requires "
                         "len(group_cols) > 0")
    else:
        index_group_cols = [f"GAGGR({c}) as GROUP_{c}"
                            for c in index_cols]
        # Group Row ID's
        index_group_cols.append("GROWID() AS __tablerow__")
        # Number of rows in the group
        index_group_cols.append("GCOUNT() as __tablerows__")
        # The first row of the group
        index_group_cols.append("GROWID()[0] as __firstrow__")

        groupby = groupby_clause(group_cols)
        select = select_clause(group_cols + index_group_cols)

        if taql_where != '':
            taql_where = f"\nWHERE\n\t{taql_where}"

        query = f"{select}\nFROM\n\t$1{taql_where}\n{groupby}"

        return TableProxy(taql_factory, query, tables=[table_proxy],
                          __executor_key__=table_proxy.executor_key)

    raise RuntimeError("Invalid condition in group_ordering_taql")
コード例 #5
0
def group_ordering_taql(table_proxy, group_cols, index_cols, taql_where=''):
    if len(group_cols) == 0:
        raise ValueError("group_ordering_taql requires " "len(group_cols) > 0")
    else:
        index_group_cols = [
            "GAGGR(%s) as GROUP_%s" % (c, c) for c in index_cols
        ]
        # Group Row ID's
        index_group_cols.append("GROWID() AS __tablerow__")
        # Number of rows in the group
        index_group_cols.append("GCOUNT() as __tablerows__")
        # The first row of the group
        index_group_cols.append("GROWID()[0] as __firstrow__")

        groupby = groupby_clause(group_cols)
        select = select_clause(group_cols + index_group_cols)

        if taql_where != '':
            taql_where = "\nHAVING\n\t%s" % taql_where

        query = "%s\nFROM\n\t$1\n%s%s" % (select, groupby, taql_where)

        return TableProxy(taql_factory,
                          query,
                          tables=[table_proxy],
                          __executor_key__=table_proxy.executor_key)

    raise RuntimeError("Invalid condition in group_ordering_taql")
コード例 #6
0
def _writable_table_proxy(table_name):
    return TableProxy(pt.table,
                      table_name,
                      ack=False,
                      readonly=False,
                      lockoptions='user',
                      __executor_key__=executor_key(table_name))
コード例 #7
0
ファイル: reads.py プロジェクト: gitter-badger/dask-ms
 def _table_proxy_factory(self):
     return TableProxy(pt.table,
                       self.table_path,
                       ack=False,
                       readonly=True,
                       lockoptions='user',
                       __executor_key__=executor_key(self.canonical_name))
コード例 #8
0
def test_row_query(ms, index_cols):
    T = TableProxy(pt.table, ms, readonly=True, lockoptions='auto', ack=False)

    # Get the expected row ordering by lexically
    # sorting the indexing columns
    cols = [(name, T.getcol(name).result()) for name in index_cols]
    expected_rows = np.lexsort(tuple(c for n, c in reversed(cols)))

    del T

    xds = xds_from_ms(ms, columns=index_cols,
                      group_cols="__row__",
                      index_cols=index_cols,
                      chunks={"row": 2})

    actual_rows = da.concatenate([ds.ROWID.data for ds in xds])
    assert_array_equal(actual_rows, expected_rows)
コード例 #9
0
def test_ms_update(ms, group_cols, index_cols, select_cols):
    # Zero everything to be sure
    with TableProxy(pt.table,
                    ms,
                    readonly=False,
                    lockoptions='auto',
                    ack=False) as T:
        nrows = T.nrows().result()
        T.putcol("STATE_ID", np.full(nrows, 0, dtype=np.int32)).result()
        data = np.zeros_like(T.getcol("DATA").result())
        data_dtype = data.dtype
        T.putcol("DATA", data).result()

    xds = xds_from_ms(ms,
                      columns=select_cols,
                      group_cols=group_cols,
                      index_cols=index_cols,
                      chunks={"row": 2})

    written_states = []
    written_data = []
    writes = []

    # Write out STATE_ID and DATA
    for i, ds in enumerate(xds):
        dims = ds.dims
        chunks = ds.chunks
        state = da.arange(i, i + dims["row"], chunks=chunks["row"])
        state = state.astype(np.int32)
        written_states.append(state)

        data = da.arange(i, i + dims["row"] * dims["chan"] * dims["corr"])
        data = data.reshape(dims["row"], dims["chan"], dims["corr"])
        data = data.rechunk((chunks["row"], chunks["chan"], chunks["corr"]))
        data = data.astype(data_dtype)
        written_data.append(data)

        nds = ds.assign(STATE_ID=(("row", ), state),
                        DATA=(("row", "chan", "corr"), data))

        write = xds_to_table(nds, ms, ["STATE_ID", "DATA"])
        writes.append(write)

    # Do all writes in parallel
    dask.compute(writes)

    xds = xds_from_ms(ms,
                      columns=select_cols,
                      group_cols=group_cols,
                      index_cols=index_cols,
                      chunks={"row": 2})

    # Check that state and data have been correctly written
    it = enumerate(zip(xds, written_states, written_data))
    for i, (ds, state, data) in it:
        assert_array_equal(ds.STATE_ID.data, state)
        assert_array_equal(ds.DATA.data, data)
コード例 #10
0
def ordering_taql(table_proxy, index_cols, taql_where=''):
    select = select_clause(["ROWID() as __tablerow__"])
    orderby = "\n" + orderby_clause(index_cols)

    if taql_where != '':
        taql_where = "\nWHERE\n\t%s" % taql_where

    query = "%s\nFROM\n\t$1%s%s" % (select, taql_where, orderby)

    return TableProxy(taql_factory, query, tables=[table_proxy],
                      __executor_key__=table_proxy.executor_key)
コード例 #11
0
def ordering_taql(table_proxy, index_cols, taql_where=''):
    select = select_clause(["ROWID() as __tablerow__"])
    orderby = "\n" + orderby_clause(index_cols)

    if taql_where != '':
        taql_where = f"\nWHERE\n\t{taql_where}"

    query = f"{select}\nFROM\n\t$1{taql_where}{orderby}"

    return TableProxy(taql_factory, query, tables=[table_proxy],
                      __executor_key__=table_proxy.executor_key)
コード例 #12
0
ファイル: test_table_proxy.py プロジェクト: ska-sa/dask-ms
    def _ant1_factory(ms):
        proxy = TableProxy(pt.table, ms, ack=False, readonly=False)
        nrows = proxy.nrows().result()

        name = 'ant1'
        row_chunk = 2
        layers = {}
        chunks = []

        for c, sr in enumerate(range(0, nrows, row_chunk)):
            er = min(sr + row_chunk, nrows)
            chunk_size = er - sr
            chunks.append(chunk_size)
            layers[(name, c)] = (proxy.getcol, "ANTENNA1", sr, chunk_size)

        # Create array
        graph = HighLevelGraph.from_collections(name, layers, [])
        ant1 = da.Array(graph, name, (tuple(chunks), ), dtype=np.int32)
        # Evaluate futures
        return ant1.map_blocks(lambda f: f.result(), dtype=ant1.dtype)
コード例 #13
0
ファイル: test_table_proxy.py プロジェクト: ska-sa/dask-ms
def test_taql_proxy_pickling(ms):
    """ Test taql pickling """
    proxy = TableProxy(pt.taql, f"SELECT UNIQUE ANTENNA1 FROM '{ms}'")
    proxy2 = pickle.loads(pickle.dumps(proxy))

    assert_liveness(1, 1)

    assert proxy is proxy2
    assert tokenize(proxy) == tokenize(proxy2)

    del proxy, proxy2
    assert_liveness(0, 0)
コード例 #14
0
def test_table_proxy_locks(ms, lockseq):
    assert len(_table_cache) == 0

    table_proxy = TableProxy(pt.table, ms, readonly=False, ack=False)

    reads = 0
    writes = 0

    fn_map = {'a': table_proxy._acquire, 'd': table_proxy._release}
    lock_map = {'n': NOLOCK, 'r': READLOCK, 'w': WRITELOCK}

    for action, lock in lockseq:
        try:
            fn = fn_map[action]
        except KeyError:
            raise ValueError("Invalid action '%s'" % action)

        try:
            locktype = lock_map[lock]
        except KeyError:
            raise ValueError("Invalid lock type '%s'" % locktype)

        # Increment/decrement on acquire/release
        if action == "a":
            if locktype == READLOCK:
                reads += 1
            elif locktype == WRITELOCK:
                writes += 1
        elif action == "d":
            if locktype == READLOCK:
                reads -= 1
            elif locktype == WRITELOCK:
                writes -= 1

        fn(locktype)

        # Check invariants
        have_locks = reads + writes > 0
        assert table_proxy._readlocks == reads
        assert table_proxy._writelocks == writes
        assert table_proxy._table.haslock(table_proxy._write) is have_locks
        assert table_proxy._write is (writes > 0)

    # Check invariants
    have_locks = reads + writes > 0
    assert reads == 0
    assert writes == 0
    assert table_proxy._readlocks == reads
    assert table_proxy._writelocks == writes
    assert table_proxy._table.haslock(table_proxy._write) is have_locks
    assert table_proxy._write is (writes > 0)
コード例 #15
0
ファイル: test_table_proxy.py プロジェクト: ska-sa/dask-ms
def test_table_proxy_pickling(ms):
    """ Test table pickling """
    proxy = TableProxy(pt.table, ms, ack=False, readonly=False)
    proxy2 = pickle.loads(pickle.dumps(proxy))

    assert_liveness(1, 1)

    # Same object and tokens
    assert proxy is proxy2
    assert tokenize(proxy) == tokenize(proxy2)

    del proxy, proxy2

    assert_liveness(0, 0)
コード例 #16
0
ファイル: test_table_proxy.py プロジェクト: smasoka/dask-ms
def test_taql_factory(ms, ant_table, readonly):
    """ Test that we can do a somewhat complicated taql query """
    ms_proxy = TableProxy(pt.table, ms, ack=False, readonly=True)
    ant_proxy = TableProxy(pt.table, ant_table, ack=False, readonly=True)
    query = "SELECT [SELECT NAME FROM $2][ANTENNA1] AS NAME FROM $1 "
    taql_proxy = TableProxy(taql_factory, query, tables=[ms_proxy, ant_proxy],
                            readonly=readonly)

    ant1 = ms_proxy.getcol("ANTENNA1").result()
    actual_ant_row_names = taql_proxy.getcol("NAME").result()
    expected_ant_row_names = ['ANTENNA-%d' % i for i in ant1]

    assert_array_equal(actual_ant_row_names, expected_ant_row_names)
コード例 #17
0
def test_column_metadata(ms, column, shape, chunks, table_schema, dtype):
    table_proxy = TableProxy(pt.table, ms, readonly=True, ack=False)
    assert_liveness(1, 1)

    try:
        dims = table_schema[column]['dims']
    except KeyError:
        dims = tuple("%s-%d" % (column, i) for i in range(1, len(shape) + 1))

    meta = column_metadata(column, table_proxy, table_schema, dict(chunks))

    assert meta.shape == shape
    assert meta.dims == dims
    assert meta.chunks == [c[1] for c in chunks[:len(meta.shape)]]
    assert meta.dtype == dtype

    del table_proxy
    assert_liveness(0, 0)
コード例 #18
0
ファイル: test_table_proxy.py プロジェクト: ska-sa/dask-ms
def test_table_proxy(ms):
    """ Base table proxy test """
    tp = TableProxy(pt.table, ms, ack=False, readonly=False)
    tq = TableProxy(pt.taql, f"SELECT UNIQUE ANTENNA1 FROM '{ms}'")

    assert_liveness(2, 1)

    assert tp.nrows().result() == 10
    assert tq.nrows().result() == 3

    # Different tokens
    assert tokenize(tp) != tokenize(tq)

    del tp, tq

    assert_liveness(0, 0)
コード例 #19
0
def table_proxy(ms):
    return TableProxy(pt.table,
                      ms,
                      ack=False,
                      lockoptions='user',
                      readonly=True)