def test_embedding_table_proxy_in_taql(ms, reverse): """ Test using a TableProxy to create a TAQL TableProxy """ proxy = TableProxy(pt.table, ms, ack=False, readonly=True) query = "SELECT UNIQUE ANTENNA1 FROM $1" taql_proxy = TableProxy(taql_factory, query, tables=[proxy]) assert_array_equal(taql_proxy.getcol("ANTENNA1").result(), [0, 1, 2]) # TAQL and original table assert_liveness(2, 1) if reverse: del proxy # TAQL still references original table assert_liveness(2, 1) # Remove TAQL now results in everything clearing up del taql_proxy assert_liveness(0, 0) else: # Removing TAQL should leave original table del taql_proxy assert_liveness(1, 1) # Removing proxy removes the last del proxy assert_liveness(0, 0)
def from_store(store): typ = store.type() if typ == "casa": from daskms.table_proxy import TableProxy import pyrap.tables as pt table_proxy = TableProxy(pt.table, str(store.casa_path()), readonly=True, ack=False) keywords = table_proxy.getkeywords().result() try: version = str(keywords["MS_VERSION"]) except KeyError: typ = "plain" version = "<unspecified>" else: typ = "measurementset" subtables = CasaFormat.find_subtables(keywords) return CasaFormat(version, subtables, typ) elif typ == "zarr": subtables = ZarrFormat.find_subtables(store) return ZarrFormat("0.1", subtables) elif typ == "parquet": subtables = ParquetFormat.find_subtables(store) return ParquetFormat("0.1", subtables) else: raise ValueError(f"Unexpected table type {typ}")
def test_ms_read(ms, group_cols, index_cols, select_cols): xds = xds_from_ms(ms, columns=select_cols, group_cols=group_cols, index_cols=index_cols, chunks={"row": 2}) order = orderby_clause(index_cols) np_column_data = [] with TableProxy(pt.table, ms, lockoptions='auto', ack=False) as T: for ds in xds: assert "ROWID" in ds.coords group_col_values = [ds.attrs[a] for a in group_cols] where = where_clause(group_cols, group_col_values) query = f"SELECT * FROM $1 {where} {order}" with TableProxy(taql_factory, query, tables=[T]) as Q: column_data = {c: Q.getcol(c).result() for c in select_cols} np_column_data.append(column_data) del T for d, (ds, column_data) in enumerate(zip(xds, np_column_data)): for c in select_cols: dask_data = ds.data_vars[c].data.compute() assert_array_equal(column_data[c], dask_data)
def group_ordering_taql(table_proxy, group_cols, index_cols, taql_where=''): if len(group_cols) == 0: raise ValueError("group_ordering_taql requires " "len(group_cols) > 0") else: index_group_cols = [f"GAGGR({c}) as GROUP_{c}" for c in index_cols] # Group Row ID's index_group_cols.append("GROWID() AS __tablerow__") # Number of rows in the group index_group_cols.append("GCOUNT() as __tablerows__") # The first row of the group index_group_cols.append("GROWID()[0] as __firstrow__") groupby = groupby_clause(group_cols) select = select_clause(group_cols + index_group_cols) if taql_where != '': taql_where = f"\nWHERE\n\t{taql_where}" query = f"{select}\nFROM\n\t$1{taql_where}\n{groupby}" return TableProxy(taql_factory, query, tables=[table_proxy], __executor_key__=table_proxy.executor_key) raise RuntimeError("Invalid condition in group_ordering_taql")
def group_ordering_taql(table_proxy, group_cols, index_cols, taql_where=''): if len(group_cols) == 0: raise ValueError("group_ordering_taql requires " "len(group_cols) > 0") else: index_group_cols = [ "GAGGR(%s) as GROUP_%s" % (c, c) for c in index_cols ] # Group Row ID's index_group_cols.append("GROWID() AS __tablerow__") # Number of rows in the group index_group_cols.append("GCOUNT() as __tablerows__") # The first row of the group index_group_cols.append("GROWID()[0] as __firstrow__") groupby = groupby_clause(group_cols) select = select_clause(group_cols + index_group_cols) if taql_where != '': taql_where = "\nHAVING\n\t%s" % taql_where query = "%s\nFROM\n\t$1\n%s%s" % (select, groupby, taql_where) return TableProxy(taql_factory, query, tables=[table_proxy], __executor_key__=table_proxy.executor_key) raise RuntimeError("Invalid condition in group_ordering_taql")
def _writable_table_proxy(table_name): return TableProxy(pt.table, table_name, ack=False, readonly=False, lockoptions='user', __executor_key__=executor_key(table_name))
def _table_proxy_factory(self): return TableProxy(pt.table, self.table_path, ack=False, readonly=True, lockoptions='user', __executor_key__=executor_key(self.canonical_name))
def test_row_query(ms, index_cols): T = TableProxy(pt.table, ms, readonly=True, lockoptions='auto', ack=False) # Get the expected row ordering by lexically # sorting the indexing columns cols = [(name, T.getcol(name).result()) for name in index_cols] expected_rows = np.lexsort(tuple(c for n, c in reversed(cols))) del T xds = xds_from_ms(ms, columns=index_cols, group_cols="__row__", index_cols=index_cols, chunks={"row": 2}) actual_rows = da.concatenate([ds.ROWID.data for ds in xds]) assert_array_equal(actual_rows, expected_rows)
def test_ms_update(ms, group_cols, index_cols, select_cols): # Zero everything to be sure with TableProxy(pt.table, ms, readonly=False, lockoptions='auto', ack=False) as T: nrows = T.nrows().result() T.putcol("STATE_ID", np.full(nrows, 0, dtype=np.int32)).result() data = np.zeros_like(T.getcol("DATA").result()) data_dtype = data.dtype T.putcol("DATA", data).result() xds = xds_from_ms(ms, columns=select_cols, group_cols=group_cols, index_cols=index_cols, chunks={"row": 2}) written_states = [] written_data = [] writes = [] # Write out STATE_ID and DATA for i, ds in enumerate(xds): dims = ds.dims chunks = ds.chunks state = da.arange(i, i + dims["row"], chunks=chunks["row"]) state = state.astype(np.int32) written_states.append(state) data = da.arange(i, i + dims["row"] * dims["chan"] * dims["corr"]) data = data.reshape(dims["row"], dims["chan"], dims["corr"]) data = data.rechunk((chunks["row"], chunks["chan"], chunks["corr"])) data = data.astype(data_dtype) written_data.append(data) nds = ds.assign(STATE_ID=(("row", ), state), DATA=(("row", "chan", "corr"), data)) write = xds_to_table(nds, ms, ["STATE_ID", "DATA"]) writes.append(write) # Do all writes in parallel dask.compute(writes) xds = xds_from_ms(ms, columns=select_cols, group_cols=group_cols, index_cols=index_cols, chunks={"row": 2}) # Check that state and data have been correctly written it = enumerate(zip(xds, written_states, written_data)) for i, (ds, state, data) in it: assert_array_equal(ds.STATE_ID.data, state) assert_array_equal(ds.DATA.data, data)
def ordering_taql(table_proxy, index_cols, taql_where=''): select = select_clause(["ROWID() as __tablerow__"]) orderby = "\n" + orderby_clause(index_cols) if taql_where != '': taql_where = "\nWHERE\n\t%s" % taql_where query = "%s\nFROM\n\t$1%s%s" % (select, taql_where, orderby) return TableProxy(taql_factory, query, tables=[table_proxy], __executor_key__=table_proxy.executor_key)
def ordering_taql(table_proxy, index_cols, taql_where=''): select = select_clause(["ROWID() as __tablerow__"]) orderby = "\n" + orderby_clause(index_cols) if taql_where != '': taql_where = f"\nWHERE\n\t{taql_where}" query = f"{select}\nFROM\n\t$1{taql_where}{orderby}" return TableProxy(taql_factory, query, tables=[table_proxy], __executor_key__=table_proxy.executor_key)
def _ant1_factory(ms): proxy = TableProxy(pt.table, ms, ack=False, readonly=False) nrows = proxy.nrows().result() name = 'ant1' row_chunk = 2 layers = {} chunks = [] for c, sr in enumerate(range(0, nrows, row_chunk)): er = min(sr + row_chunk, nrows) chunk_size = er - sr chunks.append(chunk_size) layers[(name, c)] = (proxy.getcol, "ANTENNA1", sr, chunk_size) # Create array graph = HighLevelGraph.from_collections(name, layers, []) ant1 = da.Array(graph, name, (tuple(chunks), ), dtype=np.int32) # Evaluate futures return ant1.map_blocks(lambda f: f.result(), dtype=ant1.dtype)
def test_taql_proxy_pickling(ms): """ Test taql pickling """ proxy = TableProxy(pt.taql, f"SELECT UNIQUE ANTENNA1 FROM '{ms}'") proxy2 = pickle.loads(pickle.dumps(proxy)) assert_liveness(1, 1) assert proxy is proxy2 assert tokenize(proxy) == tokenize(proxy2) del proxy, proxy2 assert_liveness(0, 0)
def test_table_proxy_locks(ms, lockseq): assert len(_table_cache) == 0 table_proxy = TableProxy(pt.table, ms, readonly=False, ack=False) reads = 0 writes = 0 fn_map = {'a': table_proxy._acquire, 'd': table_proxy._release} lock_map = {'n': NOLOCK, 'r': READLOCK, 'w': WRITELOCK} for action, lock in lockseq: try: fn = fn_map[action] except KeyError: raise ValueError("Invalid action '%s'" % action) try: locktype = lock_map[lock] except KeyError: raise ValueError("Invalid lock type '%s'" % locktype) # Increment/decrement on acquire/release if action == "a": if locktype == READLOCK: reads += 1 elif locktype == WRITELOCK: writes += 1 elif action == "d": if locktype == READLOCK: reads -= 1 elif locktype == WRITELOCK: writes -= 1 fn(locktype) # Check invariants have_locks = reads + writes > 0 assert table_proxy._readlocks == reads assert table_proxy._writelocks == writes assert table_proxy._table.haslock(table_proxy._write) is have_locks assert table_proxy._write is (writes > 0) # Check invariants have_locks = reads + writes > 0 assert reads == 0 assert writes == 0 assert table_proxy._readlocks == reads assert table_proxy._writelocks == writes assert table_proxy._table.haslock(table_proxy._write) is have_locks assert table_proxy._write is (writes > 0)
def test_table_proxy_pickling(ms): """ Test table pickling """ proxy = TableProxy(pt.table, ms, ack=False, readonly=False) proxy2 = pickle.loads(pickle.dumps(proxy)) assert_liveness(1, 1) # Same object and tokens assert proxy is proxy2 assert tokenize(proxy) == tokenize(proxy2) del proxy, proxy2 assert_liveness(0, 0)
def test_taql_factory(ms, ant_table, readonly): """ Test that we can do a somewhat complicated taql query """ ms_proxy = TableProxy(pt.table, ms, ack=False, readonly=True) ant_proxy = TableProxy(pt.table, ant_table, ack=False, readonly=True) query = "SELECT [SELECT NAME FROM $2][ANTENNA1] AS NAME FROM $1 " taql_proxy = TableProxy(taql_factory, query, tables=[ms_proxy, ant_proxy], readonly=readonly) ant1 = ms_proxy.getcol("ANTENNA1").result() actual_ant_row_names = taql_proxy.getcol("NAME").result() expected_ant_row_names = ['ANTENNA-%d' % i for i in ant1] assert_array_equal(actual_ant_row_names, expected_ant_row_names)
def test_column_metadata(ms, column, shape, chunks, table_schema, dtype): table_proxy = TableProxy(pt.table, ms, readonly=True, ack=False) assert_liveness(1, 1) try: dims = table_schema[column]['dims'] except KeyError: dims = tuple("%s-%d" % (column, i) for i in range(1, len(shape) + 1)) meta = column_metadata(column, table_proxy, table_schema, dict(chunks)) assert meta.shape == shape assert meta.dims == dims assert meta.chunks == [c[1] for c in chunks[:len(meta.shape)]] assert meta.dtype == dtype del table_proxy assert_liveness(0, 0)
def test_table_proxy(ms): """ Base table proxy test """ tp = TableProxy(pt.table, ms, ack=False, readonly=False) tq = TableProxy(pt.taql, f"SELECT UNIQUE ANTENNA1 FROM '{ms}'") assert_liveness(2, 1) assert tp.nrows().result() == 10 assert tq.nrows().result() == 3 # Different tokens assert tokenize(tp) != tokenize(tq) del tp, tq assert_liveness(0, 0)
def table_proxy(ms): return TableProxy(pt.table, ms, ack=False, lockoptions='user', readonly=True)