def test02_multiple_values(pkg, variant): p = get_class(pkg) i = ek.arange(p.Int, 0, 10) v = ek.zero(p.Array3f, 10) if variant == 1: v.y = p.Float(0) loop = p.Loop(i, v) while loop.cond(i < 5): i.assign(i + 1) f = p.Float(i) v.x += f v.y += 2 * f v.z += 4 * f if variant == 0: ek.eval(i, v) else: ek.eval(i) ek.eval(v.x) ek.eval(v.y) ek.eval(v.z) assert i == p.Int(5, 5, 5, 5, 5, 5, 6, 7, 8, 9) assert v.y == p.Int(30, 28, 24, 18, 10, 0, 0, 0, 0, 0)
def eval(self, pos, vel): pos, vel = m.Array2f(pos), m.Array2f(vel) # Run for 100 iterations it, max_it = m.UInt32(0), 100 # Allocate scratch space n = max(ek.width(pos), ek.width(vel)) self.temp_pos = ek.empty(m.Array2f, n * max_it) self.temp_vel = ek.empty(m.Array2f, n * max_it) loop = m.Loop(pos, vel, it) while loop.cond(it < max_it): # Store current loop variables index = it * n + ek.arange(m.UInt32, n) ek.scatter(self.temp_pos, pos, index) ek.scatter(self.temp_vel, vel, index) # Update loop variables pos_out, vel_out = self.timestep(pos, vel) pos.assign(pos_out) vel.assign(vel_out) it += 1 # Ensure output and temp. arrays are evaluated at this point ek.eval(pos, vel) return pos, vel
def test16_custom(cname): t = get_class(cname) v1 = ek.zero(t, 100) v2 = ek.empty(t, 100) assert len(v1.state) == 100 assert len(v2.inc) == 100 v2.state = v1.state v1.state = ek.arange(type(v1.state), 100) v3 = ek.select(v1.state < 10, v1, v2) assert v3.state[3] == 3 assert v3.state[11] == 0 assert ek.width(v3) == 100 v4 = ek.zero(t, 1) ek.schedule(v4) ek.resize(v4, 200) assert ek.width(v4) == 200 assert ek.width(v3) == 100 v4 = ek.zero(t, 1) ek.resize(v4, 200) assert ek.width(v4) == 200 index = ek.arange(type(v1.state), 100) ek.scatter(v4, v1, index) v5 = ek.gather(t, v4, index) ek.eval(v5) assert v5.state == v1.state and v5.inc == v1.inc
def test07_loop_nest(pkg, variant): p = get_class(pkg) def collatz(value: p.Int): counter = p.Int(0) loop = p.Loop(value, counter) while (loop.cond(ek.neq(value, 1))): is_even = ek.eq(value & 1, 0) value.assign(ek.select(is_even, value // 2, 3 * value + 1)) counter += 1 return counter i = p.Int(1) buf = ek.full(p.Int, 1000, 16) ek.eval(buf) if variant == 0: loop_1 = p.Loop(i) while loop_1.cond(i <= 10): ek.scatter(buf, collatz(p.Int(i)), i - 1) i += 1 else: for i in range(1, 11): ek.scatter(buf, collatz(p.Int(i)), i - 1) i += 1 assert buf == p.Int(0, 1, 7, 2, 5, 8, 16, 3, 19, 6, 1000, 1000, 1000, 1000, 1000, 1000)
def export_(a, migrate_to_host, version): shape = _ek.shape(a) ndim = len(shape) shape = tuple(reversed(shape)) if not a.IsJIT: # F-style strides temp, strides = a.Type.Size, [0] * ndim for i in range(ndim): strides[i] = temp temp *= shape[i] # Array is already contiguous in memory -- document its structure return { 'shape': shape, 'strides': tuple(strides), 'typestr': '<' + a.Type.NumPy, 'data': (a.data_(), False), 'version': version, 'device': -1, 'owner': a } else: # C-style strides temp, strides = a.Type.Size, [0] * ndim for i in reversed(range(ndim)): strides[i] = temp temp *= shape[i] # JIT array -- requires extra transformations b = _ek.ravel(_ek.detach(a) if a.IsDiff else a) _ek.eval(b) if b.IsCUDA and migrate_to_host: if b is a: b = type(a)(b) b = b.migrate_(_ek.AllocType.Host) _ek.sync_thread() elif b.IsLLVM: _ek.sync_thread() record = { 'shape': shape, 'strides': tuple(strides), 'typestr': '<' + a.Type.NumPy, 'data': (b.data_(), False), 'version': version, 'device': _ek.device(b), 'owner': b } return record
def test22_scatter_rev(m): for i in range(3): idx1 = ek.arange(m.UInt, 5) idx2 = ek.arange(m.UInt, 4) + 3 x = ek.linspace(m.Float, 0, 1, 5) y = ek.linspace(m.Float, 1, 2, 4) buf = ek.zero(m.Float, 10) if i % 2 == 0: ek.enable_grad(buf) if i // 2 == 0: ek.enable_grad(x, y) x.label = "x" y.label = "y" buf.label = "buf" buf2 = m.Float(buf) ek.scatter(buf2, x, idx1) ek.eval(buf2) ek.scatter(buf2, y, idx2) ref_buf = m.Float(0.0000, 0.2500, 0.5000, 1.0000, 1.3333, 1.6667, 2.0000, 0.0000, 0.0000, 0.0000) assert ek.allclose(ref_buf, buf2, atol=1e-4) assert ek.allclose(ref_buf, buf, atol=1e-4) s = ek.dot_async(buf2, buf2) ek.backward(s) ref_x = m.Float(0.0000, 0.5000, 1.0000, 0.0000, 0.0000) ref_y = m.Float(2.0000, 2.6667, 3.3333, 4.0000) if i // 2 == 0: assert ek.allclose(ek.grad(y), ek.detach(ref_y), atol=1e-4) assert ek.allclose(ek.grad(x), ek.detach(ref_x), atol=1e-4) else: assert ek.grad(x) == 0 assert ek.grad(y) == 0 if i % 2 == 0: assert ek.allclose(ek.grad(buf), 0, atol=1e-4) else: assert ek.grad(buf) == 0
def test04_side_effect(pkg): p = get_class(pkg) i = ek.zero(p.Int, 10) j = ek.zero(p.Int, 10) buf = ek.zero(p.Float, 10) loop = p.Loop(i, j) while loop.cond(i < 10): j += i i += 1 ek.scatter_add(target=buf, value=p.Float(i), index=0) ek.eval(i, j) assert i == p.Int([10] * 10) assert buf == p.Float(550, *([0] * 9)) assert j == p.Int([45] * 10)
def test04_side_effect(pkg): p = get_class(pkg) i = ek.zero(p.Int, 10) j = ek.zero(p.Int, 10) buf = ek.zero(p.Float, 10) loop = p.Loop("MyLoop", lambda: (i, j)) while loop(i < 10): j += i i += 1 ek.scatter_reduce(op=ek.ReduceOp.Add, target=buf, value=p.Float(i), index=0) ek.eval(i, j) assert i == p.Int([10] * 10) assert buf == p.Float(550, *([0] * 9)) assert j == p.Int([45] * 10)
def test06_test_collatz(pkg, variant): p = get_class(pkg) def collatz(value: p.Int): counter = p.Int(0) loop = p.Loop(value, counter) while (loop.cond(ek.neq(value, 1))): is_even = ek.eq(value & 1, 0) value.assign(ek.select(is_even, value // 2, 3 * value + 1)) counter += 1 return value, counter value, ctr = collatz(ek.arange(p.Int, 1, 11)) if variant == 0: ek.eval(value, ctr) elif variant == 1: ek.eval(value) ek.eval(ctr) elif variant == 2: ek.eval(ctr) ek.eval(value) assert value == p.Int([1] * 10) assert ctr == p.Int([0, 1, 7, 2, 5, 8, 16, 3, 19, 6])
def test46_loop_ballistic_2(m): class Ballistic2(ek.CustomOp): def timestep(self, pos, vel, dt=0.02, mu=.1, g=9.81): acc = -mu * vel * ek.norm(vel) - m.Array2f(0, g) pos_out = pos + dt * vel vel_out = vel + dt * acc return pos_out, vel_out def eval(self, pos, vel): pos, vel = m.Array2f(pos), m.Array2f(vel) # Run for 100 iterations it, max_it = m.UInt32(0), 100 loop = m.Loop(pos, vel, it) while loop.cond(it < max_it): # Update loop variables pos_out, vel_out = self.timestep(pos, vel) pos.assign(pos_out) vel.assign(vel_out) it += 1 self.pos = pos self.vel = vel return pos, vel def backward(self): grad_pos, grad_vel = self.grad_out() pos, vel = self.pos, self.vel # Run for 100 iterations it = m.UInt32(0) loop = m.Loop(it, pos, vel, grad_pos, grad_vel) while loop.cond(it < 100): # Take reverse step in time pos_rev, vel_rev = self.timestep(pos, vel, dt=-0.02) pos.assign(pos_rev) vel.assign(vel_rev) # Take a forward step in time, keep track of derivatives ek.enable_grad(pos_rev, vel_rev) pos_fwd, vel_fwd = self.timestep(pos_rev, vel_rev, dt=0.02) ek.set_grad(pos_fwd, grad_pos) ek.set_grad(vel_fwd, grad_vel) ek.enqueue(pos_fwd, vel_fwd) ek.traverse(m.Float, reverse=True) grad_pos.assign(ek.grad(pos_rev)) grad_vel.assign(ek.grad(vel_rev)) it += 1 self.set_grad_in('pos', grad_pos) self.set_grad_in('vel', grad_vel) ek.enable_flag(ek.JitFlag.RecordLoops) pos_in = m.Array2f([1, 2, 4], [1, 2, 1]) vel_in = m.Array2f([10, 9, 4], [5, 3, 6]) for i in range(20): ek.enable_grad(vel_in) ek.eval(vel_in, pos_in) pos_out, vel_out = ek.custom(Ballistic2, pos_in, vel_in) loss = ek.squared_norm(pos_out - m.Array2f(5, 0)) ek.backward(loss) vel_in = m.Array2f(ek.detach(vel_in) - 0.2 * ek.grad(vel_in)) assert ek.allclose(loss, 0, atol=1e-4) assert ek.allclose(vel_in.x, [3.3516, 2.3789, 0.79156], atol=1e-3) ek.disable_flag(ek.JitFlag.RecordLoops)
def test46_loop_ballistic(m): class Ballistic(ek.CustomOp): def timestep(self, pos, vel, dt=0.02, mu=.1, g=9.81): acc = -mu * vel * ek.norm(vel) - m.Array2f(0, g) pos_out = pos + dt * vel vel_out = vel + dt * acc return pos_out, vel_out def eval(self, pos, vel): pos, vel = m.Array2f(pos), m.Array2f(vel) # Run for 100 iterations it, max_it = m.UInt32(0), 100 # Allocate scratch space n = max(ek.width(pos), ek.width(vel)) self.temp_pos = ek.empty(m.Array2f, n * max_it) self.temp_vel = ek.empty(m.Array2f, n * max_it) loop = m.Loop(pos, vel, it) while loop.cond(it < max_it): # Store current loop variables index = it * n + ek.arange(m.UInt32, n) ek.scatter(self.temp_pos, pos, index) ek.scatter(self.temp_vel, vel, index) # Update loop variables pos_out, vel_out = self.timestep(pos, vel) pos.assign(pos_out) vel.assign(vel_out) it += 1 # Ensure output and temp. arrays are evaluated at this point ek.eval(pos, vel) return pos, vel def backward(self): grad_pos, grad_vel = self.grad_out() # Run for 100 iterations it = m.UInt32(100) loop = m.Loop(it, grad_pos, grad_vel) n = ek.width(grad_pos) while loop.cond(it > 0): # Retrieve loop variables, reverse chronological order it -= 1 index = it * n + ek.arange(m.UInt32, n) pos = ek.gather(m.Array2f, self.temp_pos, index) vel = ek.gather(m.Array2f, self.temp_vel, index) # Differentiate loop body in reverse mode ek.enable_grad(pos, vel) pos_out, vel_out = self.timestep(pos, vel) ek.set_grad(pos_out, grad_pos) ek.set_grad(vel_out, grad_vel) ek.enqueue(pos_out, vel_out) ek.traverse(m.Float, reverse=True) # Update loop variables grad_pos.assign(ek.grad(pos)) grad_vel.assign(ek.grad(vel)) self.set_grad_in('pos', grad_pos) self.set_grad_in('vel', grad_vel) pos_in = m.Array2f([1, 2, 4], [1, 2, 1]) vel_in = m.Array2f([10, 9, 4], [5, 3, 6]) ek.enable_flag(ek.JitFlag.RecordLoops) for i in range(20): ek.enable_grad(vel_in) ek.eval(vel_in, pos_in) pos_out, vel_out = ek.custom(Ballistic, pos_in, vel_in) loss = ek.squared_norm(pos_out - m.Array2f(5, 0)) ek.backward(loss) vel_in = m.Array2f(ek.detach(vel_in) - 0.2 * ek.grad(vel_in)) assert ek.allclose(loss, 0, atol=1e-4) assert ek.allclose(vel_in.x, [3.3516, 2.3789, 0.79156], atol=1e-3) ek.disable_flag(ek.JitFlag.RecordLoops)
def export_(a, migrate_to_host, version, owner_supported=True): shape = _ek.shape(a) ndim = len(shape) shape = tuple(reversed(shape)) if not a.IsJIT: # F-style strides temp, strides = a.Type.Size, [0] * ndim # Enoki represents 3D arrays as 4D to leverage SIMD instructions padding = 1 if a.IsScalar and a.IsMatrix and shape[0] == 3 else 0 for i in range(ndim): strides[i] = temp temp *= shape[i] + padding # Array is already contiguous in memory -- document its structure return { 'shape': shape, 'strides': tuple(strides), 'typestr': '<' + a.Type.NumPy, 'data': (a.data_(), False), 'version': version, 'device': -1, 'owner': a } else: # C-style strides temp, strides = a.Type.Size, [0] * ndim # First dimension is the dynamic one, the rest should be in reversed order for i in reversed(range(1, ndim)): strides[ndim - i] = temp temp *= shape[i] strides[0] = temp # JIT array -- requires extra transformations b = _ek.ravel(_ek.detach(a) if a.IsDiff else a) _ek.eval(b) if b.IsCUDA and migrate_to_host: if b is a: b = type(a)(b) b = b.migrate_(_ek.AllocType.Host) _ek.sync_thread() elif b.IsLLVM: _ek.sync_thread() if not owner_supported and a is not b: # If the caller cannot deal with the 'owner' field, use # a weak reference to keep 'b' alive while 'a' exists _wr.finalize(a, lambda arg: None, b) record = { 'shape': shape, 'strides': tuple(strides), 'typestr': '<' + a.Type.NumPy, 'data': (b.data_(), False), 'version': version, 'device': _ek.device(b), 'owner': b } return record