def benchmark(): print('Also check "nvprof --print-gpu-trace python3 diffmpm_benchmark.py" for more accurate results') iters = 100000 for i in range(1): p2g(0) grid_op() g2p(0) t = time.time() ti.runtime.sync() for i in range(iters): # clear_grid() p2g(0) grid_op() g2p(0) ti.runtime.sync() print('forward ', (time.time() - t) / iters * 1000 * 3, 'ms') ti.profiler_print() for i in range(1): p2g.grad(0) grid_op.grad() g2p.grad(0) t = time.time() ti.runtime.sync() for i in range(iters): # clear_grid() g2p.grad(0) grid_op.grad() p2g.grad(0) ti.runtime.sync() print('backward ', (time.time() - t) / iters * 1000 * 3, 'ms') ti.profiler_print()
def main(): for i in range(n_particles): x[i] = [random.random() * 0.4 + 0.2, random.random() * 0.4 + 0.2] v[i] = [0, -1] J[i] = 1 for f in range(200): canvas.clear(0x112F41) t = time.time() for s in range(150): clear_grid() p2g() grid_op() g2p() print('{:.1f} ms per frame'.format(1000 * (time.time() - t))) pos = np.empty((2 * n_particles), dtype=np.float32) copy_x(pos) for i in range(n_particles): # canvas.circle(ti.vec(x[i][0], x[i][1])).radius(1).color(0x068587).finish() # Python binding here is still a bit slow... canvas.circle(ti.vec(pos[i * 2], pos[i * 2 + 1])).radius(1).color(0x068587).finish() gui.update() ti.profiler_print()
def main(): # make kernels for each multigrid level initialize = np.zeros(n_mg_levels, dtype=ti.Kernel) for l in range(n_mg_levels): initialize[l] = initialize_kernel(l) # run kernels on each multigrid level for l in range(n_mg_levels): initialize[l]() ti.profiler_print()
def main(): # initialization init_v[None] = [0, 0] for i in range(n_particles): F[0, i] = [[1, 0], [0, 1]] for i in range(N): for j in range(N): x[0, i * N + j] = [dx * (i * 0.5 + 10), dx * (j * 0.5 + 25)] set_v() benchmark() losses = [] img_count = 0 for i in range(30): l = forward() losses.append(l) grad = backward() print('loss=', l, ' grad=', (grad[0], grad[1])) learning_rate = 10 init_v(0)[None] -= learning_rate * grad[0] init_v(1)[None] -= learning_rate * grad[1] # visualize for s in range(63, steps, 64): scale = 4 img = np.zeros(shape=(scale * n_grid, scale * n_grid)) + 0.3 total = [0, 0] for i in range(n_particles): p_x = int(scale * x(0)[s, i] / dx) p_y = int(scale * x(1)[s, i] / dx) total[0] += p_x total[1] += p_y img[p_x, p_y] = 1 cv2.circle(img, (total[1] // n_particles, total[0] // n_particles), radius=5, color=0, thickness=5) cv2.circle(img, (int(target[1] * scale * n_grid), int(target[0] * scale * n_grid)), radius=5, color=1, thickness=5) img = img.swapaxes(0, 1)[::-1] cv2.imshow('MPM', img) img_count += 1 # cv2.imwrite('MPM{:04d}.png'.format(img_count), img * 255) cv2.waitKey(1) ti.profiler_print() ti.profiler_print() plt.title("Optimization of Initial Velocity") plt.ylabel("Loss") plt.xlabel("Gradient Descent Iterations") plt.plot(losses) plt.show()
def main(): for i in range(n_particle_x): for j in range(n_particle_y): t = mesh(i, j) x[t] = [0.1 + i * dx * 0.5, 0.7 + j * dx * 0.5] v[t] = [0, -1] # build mesh for i in range(n_particle_x - 1): for j in range(n_particle_y - 1): # element id eid = (i * (n_particle_y - 1) + j) * 2 vertices[eid, 0] = mesh(i, j) vertices[eid, 1] = mesh(i + 1, j) vertices[eid, 2] = mesh(i, j + 1) eid = (i * (n_particle_y - 1) + j) * 2 + 1 vertices[eid, 0] = mesh(i, j + 1) vertices[eid, 1] = mesh(i + 1, j + 1) vertices[eid, 2] = mesh(i + 1, j) compute_rest_T() os.makedirs('tmp', exist_ok=True) for f in range(600): canvas.clear(0x112F41) for s in range(50): clear_grid() # Note that we are now differentiating the total energy w.r.t. the particle position. # Recall that F = - \partial (total_energy) / \partial x with ti.Tape(total_energy): # Do the forward computation of total energy and backward propagation for x.grad, which is later used in p2g compute_total_energy() # It's OK not to use the computed total_energy at all, since we only need x.grad p2g() grid_op() g2p() canvas.circle(ti.vec(0.5, 0.5)).radius(70).color(0x068587).finish() # TODO: why is visualization so slow? for i in range(n_elements): for j in range(3): a, b = vertices[i, j], vertices[i, (j + 1) % 3] canvas.path(ti.vec(x[a][0], x[a][1]), ti.vec( x[b][0], x[b][1])).radius(1).color(0x4FB99F).finish() for i in range(n_particles): canvas.circle(ti.vec(x[i][0], x[i][1])).radius(2).color(0xF2B134).finish() gui.update() gui.screenshot("tmp/{:04d}.png".format(f)) ti.profiler_print()
def main(): for i in range(n_particle_x): for j in range(n_particle_y): t = mesh(i, j) x[t] = [0.1 + i * dx * 0.5, 0.7 + j * dx * 0.5] v[t] = [0, -1] # build mesh for i in range(n_particle_x - 1): for j in range(n_particle_y - 1): # element id eid = (i * (n_particle_y - 1) + j) * 2 vertices[eid, 0] = mesh(i, j) vertices[eid, 1] = mesh(i + 1, j) vertices[eid, 2] = mesh(i, j + 1) eid = (i * (n_particle_y - 1) + j) * 2 + 1 vertices[eid, 0] = mesh(i, j + 1) vertices[eid, 1] = mesh(i + 1, j + 1) vertices[eid, 2] = mesh(i + 1, j) compute_rest_T() os.makedirs('tmp', exist_ok=True) for f in range(600): canvas.clear(0x112F41) for s in range(50): clear_grid() with ti.Tape(total_energy): compute_total_energy() p2g() grid_op() g2p() canvas.circle(ti.vec(0.5, 0.5)).radius(70).color(0x068587).finish() # TODO: why is visualization so slow? for i in range(n_elements): for j in range(3): a, b = vertices[i, j], vertices[i, (j + 1) % 3] canvas.path(ti.vec(x[a][0], x[a][1]), ti.vec( x[b][0], x[b][1])).radius(1).color(0x4FB99F).finish() for i in range(n_particles): canvas.circle(ti.vec(x[i][0], x[i][1])).radius(2).color(0xF2B134).finish() gui.update() gui.screenshot("tmp/{:04d}.png".format(f)) ti.profiler_print()
def main(): for i in range(n_particles): x[i] = [random.random() * 0.4 + 0.2, random.random() * 0.4 + 0.2] v[i] = [0, -1] J[i] = 1 for f in range(200): canvas.clear(0x112F41) for s in range(150): clear_grid() p2g() grid_op() g2p() # TODO: why is visualization so slow? for i in range(n_particles): canvas.circle(ti.vec(x[i][0], x[i][1])).radius(1).color(0x068587).finish() gui.update() ti.profiler_print()
def main(): for i in range(n_particles): x[i] = [random.random() * 0.4 + 0.2, random.random() * 0.4 + 0.2] v[i] = [0, -1] J[i] = 1 for f in range(200): for s in range(150): clear_grid() p2g() grid_op() g2p() ti.profiler_print() scale = 2 img = np.zeros(shape=(scale * n_grid, scale * n_grid)) + 0.3 for i in range(n_particles): p_x = int(scale * x(0)[i] / dx) p_y = int(scale * x(1)[i] / dx) img[p_x, p_y] = 1 img = img.swapaxes(0, 1)[::-1] cv2.imshow('MPM', img) cv2.waitKey(1)
@ti.kernel def set1(): for i, j in a: a[i, j] = 2.0 @ti.kernel def set2(): for j in range(N * 8): for i in range(N * 8): a[i, j] = 2.0 set1() set2() t = time.time() for n in range(100): set1() elapsed = time.time() - t ti.get_runtime().sync() print(elapsed * 10, 'ms/iter') t = time.time() for n in range(100): set2() elapsed = time.time() - t ti.get_runtime().sync() print(elapsed * 10, 'ms/iter') ti.profiler_print()
def run(self): gui = ti.GUI("Multigrid Preconditioned Conjugate Gradients", res=(self.N_gui, self.N_gui)) self.init() self.reduce(self.r[0], self.r[0]) initial_rTr = self.sum[None] # self.r = b - Ax = b since self.x = 0 # self.p = self.r = self.r + 0 self.p if self.use_multigrid: self.apply_preconditioner() else: self.z[0].copy_from(self.r[0]) self.update_p() self.reduce(self.z[0], self.r[0]) old_zTr = self.sum[None] # CG for i in range(400): # self.alpha = rTr / pTAp self.compute_Ap() self.reduce(self.p, self.Ap) pAp = self.sum[None] self.alpha[None] = old_zTr / pAp # self.x = self.x + self.alpha self.p self.update_x() # self.r = self.r - self.alpha self.Ap self.update_r() # check for convergence self.reduce(self.r[0], self.r[0]) rTr = self.sum[None] if rTr < initial_rTr * 1.0e-12: break # self.z = M^-1 self.r if self.use_multigrid: self.apply_preconditioner() else: self.z[0].copy_from(self.r[0]) # self.beta = new_rTr / old_rTr self.reduce(self.z[0], self.r[0]) new_zTr = self.sum[None] self.beta[None] = new_zTr / old_zTr # self.p = self.z + self.beta self.p self.update_p() old_zTr = new_zTr print(f'iter {i}, residual={rTr}') self.paint() gui.set_image(self.pixels) gui.show() ti.profiler_print()