def test_ncell_bytes_attribute(self): ctx = t.Ctx() dtype = np.dtype((np.bytes_, 10)) attr = t.Attr(ctx, "foo", dtype=dtype) self.assertEqual(attr.dtype, dtype) self.assertEqual(attr.ncells, 10)
def test_write_read(self): ctx = t.Ctx() vfs = t.VFS(ctx) buffer = b"bar" fh = vfs.open(self.path("foo"), "w") vfs.write(fh, buffer) vfs.close(fh) self.assertEqual(vfs.file_size(self.path("foo")), 3) fh = vfs.open(self.path("foo"), "r") self.assertEqual(vfs.read(fh, 0, 3), buffer) vfs.close(fh) # write / read empty input fh = vfs.open(self.path("baz"), "w") vfs.write(fh, b"") vfs.close(fh) self.assertEqual(vfs.file_size(self.path("baz")), 0) fh = vfs.open(self.path("baz"), "r") self.assertEqual(vfs.read(fh, 0, 0), b"") vfs.close(fh) # read from file that does not exist with self.assertRaises(t.TileDBError): vfs.open(self.path("do_not_exist"), "r")
def test_minimal_attribute(self): ctx = t.Ctx() attr = t.Attr(ctx) self.assertTrue(attr.isanon) self.assertEqual(attr.name, u"") self.assertEqual(attr.dtype, np.float_) self.assertEqual(attr.compressor, (None, -1))
def test_ctx_config_from_file(self): config_path = self.path("config") with open(config_path, "w") as fh: fh.write("sm.tile_cache_size 100") ctx = t.Ctx(config=t.Config.load(config_path)) config = ctx.config() self.assertEqual(config["sm.tile_cache_size"], "100")
def test_multiple_attributes(self): ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, domain=(0, 7), tile=8, dtype=int)) attr_int = t.Attr(ctx, "ints", dtype=int) attr_float = t.Attr(ctx, "floats", dtype=float) T = t.DenseArray(ctx, self.path("foo"), domain=dom, attrs=(attr_int, attr_float)) V_ints = np.array([0, 1, 2, 3, 4, 6, 7, 5]) V_floats = np.array([0.0, 1.0, 2.0, 3.0, 4.0, 6.0, 7.0, 5.0]) V = {"ints": V_ints, "floats": V_floats} T[:] = V R = T[:] assert_array_equal(V["ints"], R["ints"]) assert_array_equal(V["floats"], R["floats"]) # check error ncells length V["ints"] = V["ints"][1:2].copy() with self.assertRaises(t.TileDBError): T[:] = V # check error attribute does not exist V["foo"] = V["ints"].astype(np.int8) with self.assertRaises(t.TileDBError): T[:] = V
def test_key_not_found(self): # create a kv database ctx = t.Ctx() a1 = t.Attr(ctx, "value", dtype=bytes) kv = t.KV(ctx, self.path("foo"), attrs=(a1, )) self.assertRaises(KeyError, kv.__getitem__, "not here")
def test_dense_array_fp_domain_error(self): ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, domain=(1, 8), tile=2, dtype=np.float64)) att = t.Attr(ctx, "val", dtype=np.float64) with self.assertRaises(t.TileDBError): t.DenseArray(ctx, self.path("foo"), domain=dom, attrs=(att, ))
def test_kv_contains(self): # create a kv database ctx = t.Ctx() a1 = t.Attr(ctx, "value", dtype=bytes) kv = t.KV(ctx, self.path("foo"), attrs=(a1, )) self.assertFalse("foo" in kv) kv['foo'] = 'bar' self.assertTrue("foo" in kv)
def test_kv_dict(self): # create a kv database ctx = t.Ctx() a1 = t.Attr(ctx, "value", dtype=bytes) kv = t.KV(ctx, self.path("foo"), attrs=(a1, )) kv['foo'] = 'bar' kv['baz'] = 'foo' self.assertEqual(kv.dict(), {'foo': 'bar', 'baz': 'foo'}) self.assertEqual(dict(kv), {'foo': 'bar', 'baz': 'foo'})
def test_full_attribute(self): ctx = t.Ctx() attr = t.Attr(ctx, "foo", dtype=np.int64, compressor=("zstd", 10)) attr.dump() self.assertEqual(attr.name, "foo") self.assertEqual(attr.dtype, np.int64) compressor, level = attr.compressor self.assertEqual(compressor, "zstd") self.assertEqual(level, 10)
def test_kv_write_read(self): # create a kv database ctx = t.Ctx() a1 = t.Attr(ctx, "value", dtype=bytes) kv = t.KV(ctx, self.path("foo"), attrs=(a1, )) a1.dump() kv['foo'] = 'bar' kv.dump() self.assertEqual(kv["foo"], 'bar')
def test_sparse_array_not_dense(self): ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, domain=(1, 8), tile=2), t.Dim(ctx, domain=(1, 8), tile=2)) att = t.Attr(ctx, "val", dtype='f8') T = t.SparseArray(ctx, self.path("foo"), domain=dom, attrs=(att, )) T.dump() self.assertTrue(T.name == self.path("foo")) self.assertTrue(T.sparse)
def test_simple_1d_sparse_vector(self): ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, domain=(0, 3), tile=4, dtype=int)) att = t.Attr(ctx, dtype=int) T = t.SparseArray(ctx, self.path("foo"), domain=dom, attrs=(att, )) values = np.array([3, 4]) T[[1, 2]] = values assert_array_equal(T[[1, 2]], values)
def test_supports(self): ctx = t.Ctx() vfs = t.VFS(ctx) self.assertTrue(vfs.supports("file")) self.assertIsInstance(vfs.supports("s3"), bool) self.assertIsInstance(vfs.supports("hdfs"), bool) with self.assertRaises(ValueError): vfs.supports("invalid")
def test_unique_attributes(self): ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, "d1", (1, 4), 2, dtype='u8'), t.Dim(ctx, "d2", (1, 4), 2, dtype='u8')) attr1 = t.Attr(ctx, "foo", dtype=float) attr2 = t.Attr(ctx, "foo", dtype=int) with self.assertRaises(t.TileDBError): t.ArraySchema(ctx, "foobar", domain=dom, attrs=(attr1, attr2))
def test_sparse_unordered_fp_domain(self): ctx = t.Ctx() dom = t.Domain( ctx, t.Dim(ctx, "x", domain=(0.0, 10.0), tile=2.0, dtype=float)) attr = t.Attr(ctx, dtype=float) T = t.SparseArray(ctx, self.path("foo"), domain=dom, attrs=(attr, )) values = np.array([3.3, 2.7]) T[[4.2, 2.5]] = values assert_array_equal(T[[2.5, 4.2]], values[::-1])
def test_attribute(self): ctx = t.Ctx() attr = t.Attr(ctx, "foo") attr.dump() self.assertEqual(attr.name, "foo") self.assertEqual(attr.dtype, np.float64, "default attribute type is float64") compressor, level = attr.compressor self.assertEqual(compressor, None, "default to no compression") self.assertEqual(level, -1, "default compression level when none is specified")
def test_kv_write_load_read(self): # create a kv database ctx = t.Ctx() a1 = t.Attr(ctx, "value", dtype=bytes) kv = t.KV(ctx, self.path("foo"), attrs=(a1, )) kv['foo'] = 'bar' del kv # try to load it kv = t.KV.load(ctx, self.path("foo")) self.assertEqual(kv["foo"], 'bar')
def test_ncell_attributes(self): ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, domain=(0, 9), tile=10, dtype=int)) attr = t.Attr(ctx, dtype=[("", np.int32), ("", np.int32)]) T = t.DenseArray(ctx, self.path("foo"), domain=dom, attrs=(attr, )) A = np.ones((10, ), dtype=[("", np.int32), ("", np.int32)]) self.assertEqual(A.dtype, attr.dtype) T[:] = A assert_array_equal(A, T[:]) assert_array_equal(A[:5], T[:5])
def test_io(self): ctx = t.Ctx() vfs = t.VFS(ctx) buffer = b"0123456789" io = t.FileIO(vfs, self.path("foo"), mode="w") io.write(buffer) io.flush() self.assertEqual(io.tell(), len(buffer)) io = t.FileIO(vfs, self.path("foo"), mode="r") with self.assertRaises(IOError): io.write(b"foo") self.assertEqual(vfs.file_size(self.path("foo")), len(buffer)) io = t.FileIO(vfs, self.path("foo"), mode='r') self.assertEqual(io.read(3), b'012') self.assertEqual(io.tell(), 3) self.assertEqual(io.read(3), b'345') self.assertEqual(io.tell(), 6) self.assertEqual(io.read(10), b'6789') self.assertEqual(io.tell(), 10) # seek from beginning io.seek(0) self.assertEqual(io.tell(), 0) self.assertEqual(io.read(), buffer) # seek must be positive when SEEK_SET with self.assertRaises(ValueError): io.seek(-1, 0) # seek from current position io.seek(5) self.assertEqual(io.tell(), 5) io.seek(3, 1) self.assertEqual(io.tell(), 8) io.seek(-3, 1) self.assertEqual(io.tell(), 5) # seek from end io.seek(-4, 2) self.assertEqual(io.tell(), 6) # Test readall io.seek(0) self.assertEqual(io.readall(), buffer) self.assertEqual(io.tell(), 10) io.seek(5) self.assertEqual(io.readall(), buffer[5:]) self.assertEqual(io.readall(), b"")
def test_read_write(self): ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, domain=(0, 2), tile=3)) att = t.Attr(ctx, dtype='i8') arr = t.DenseArray(ctx, self.path("foo"), domain=dom, attrs=[att]) A = np.array([1, 2, 3]) arr.write_direct(A) arr.dump() assert_array_equal(arr.read_direct(), A) self.assertEqual(arr.ndim, A.ndim)
def test_domain(self): ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, "d1", (1, 4), 2, dtype='u8'), t.Dim(ctx, "d2", (1, 4), 2, dtype='u8')) dom.dump() self.assertEqual(dom.ndim, 2) self.assertEqual(dom.rank, dom.ndim) self.assertEqual(dom.dtype, np.dtype("uint64")) self.assertEqual(dom.shape, (4, 4)) # check that we can iterate over the dimensions dim_names = [dim.name for dim in dom] self.assertEqual(["d1", "d2"], dim_names)
def test_subarray(self): ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, "x", domain=(1, 10000), tile=100, dtype=int)) att = t.Attr(ctx, "", dtype=float) T = t.SparseArray(ctx, self.path("foo"), domain=dom, attrs=(att, )) self.assertIsNone(T.nonempty_domain()) T[[50, 60, 100]] = [1.0, 2.0, 3.0] self.assertEqual(((50, 100), ), T.nonempty_domain()) # retrieve just valid coordinates in subarray T[40:60] assert_array_equal(T[40:61]["coords"]["x"], [50, 60])
def test_kv_update(self): # create a kv database ctx = t.Ctx() a1 = t.Attr(ctx, "val", dtype=bytes) kv = t.KV(ctx, self.path("foo"), attrs=(a1, )) kv['foo'] = 'bar' del kv kv = t.KV.load(ctx, self.path("foo")) kv['foo'] = 'baz' del kv kv = t.KV.load(ctx, self.path("foo")) self.assertEqual(kv['foo'], 'baz')
def test_array_1d_set_scalar(self): A = np.zeros(50) ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, domain=(0, 49), tile=10)) att = t.Attr(ctx, dtype=A.dtype) T = t.DenseArray(ctx, self.path("foo"), dom, (att, )) T[:] = A for value in (-1, 0, 1, 10): A[5:25] = value T[5:25] = value assert_array_equal(A, T[:]) A[:] = value T[:] = value assert_array_equal(A, T[:])
def test_index_1d(self): A = np.arange(1050, dtype=int) ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, domain=(0, 1049), tile=100)) att = t.Attr(ctx, dtype=int) T = t.DenseArray(ctx, self.path("foo"), domain=dom, attrs=(att, )) T[:] = A for idx in self.good_index_1d: self._test_index(A, T, idx) for idx in self.bad_index_1d: with self.assertRaises(IndexError): T[idx]
def test_index_2d(self): A = np.arange(10000).reshape((1000, 10)) ctx = t.Ctx() dom = t.Domain(ctx, t.Dim(ctx, domain=(0, 999), tile=100), t.Dim(ctx, domain=(0, 9), tile=2)) att = t.Attr(ctx, dtype=A.dtype) T = t.DenseArray(ctx, self.path("foo"), dom, (att, )) T[:] = A for idx in self.good_index_1d: self._test_index(A, T, idx) for idx in self.bad_index_2d: with self.assertRaises(IndexError): T[idx]
def test_ncell_attribute(self): ctx = t.Ctx() dtype = np.dtype([("", np.int32), ("", np.int32)]) attr = t.Attr(ctx, "foo", dtype=dtype) self.assertEqual(attr.dtype, dtype) self.assertEqual(attr.ncells, 2) # dtype subarrays not supported with self.assertRaises(TypeError): t.Attr(ctx, "foo", dtype=np.dtype((np.int32, 2))) # mixed type record arrays not supported with self.assertRaises(TypeError): t.Attr(ctx, "foo", dtype=np.dtype([("", np.float32), ("", np.int32)]))
def test_move(self): ctx = t.Ctx() vfs = t.VFS(ctx) vfs.create_dir(self.path("foo")) vfs.create_dir(self.path("bar")) vfs.touch(self.path("bar/baz")) self.assertTrue(vfs.is_file(self.path("bar/baz"))) vfs.move(self.path("bar/baz"), self.path("foo/baz")) self.assertFalse(vfs.is_file(self.path("bar/baz"))) self.assertTrue(vfs.is_file(self.path("foo/baz"))) # moving to invalid dir should raise an error with self.assertRaises(t.TileDBError): vfs.move(self.path("foo/baz"), self.path("do_not_exist/baz"))
def test_array_interface(self): # Tests that __array__ interface works ctx = t.Ctx() A1 = np.arange(1, 10) arr1 = t.DenseArray.from_numpy(ctx, self.path("arr1"), A1) A2 = np.array(arr1) assert_array_equal(A1, A2) # Test that __array__ interface throws an error when number of attributes > 1 dom = t.Domain(ctx, t.Dim(ctx, domain=(0, 2), tile=3)) foo = t.Attr(ctx, "foo", dtype='i8') bar = t.Attr(ctx, "bar", dtype='i8') arr2 = t.DenseArray(ctx, self.path("arr2"), domain=dom, attrs=(foo, bar)) with self.assertRaises(ValueError): np.array(arr2)