def foo(): ti.block_dim(512) ti.block_local(a) for i, j in a: for k in range(stencil_length): b[i, j] += a[i + k, j] b[i, j] += a[i, j + k]
def g2p(self, dt: ti.f32): ti.block_dim(256) ti.block_local(*self.grid_v.entries) ti.no_activate(self.particle) for I in ti.grouped(self.pid): p = self.pid[I] base = ti.floor(self.x[p] * self.inv_dx - 0.5).cast(int) for D in ti.static(range(self.dim)): base[D] = ti.assume_in_range(base[D], I[D], 0, 1) fx = self.x[p] * self.inv_dx - base.cast(float) w = [ 0.5 * (1.5 - fx)**2, 0.75 - (fx - 1.0)**2, 0.5 * (fx - 0.5)**2 ] new_v = ti.Vector.zero(ti.f32, self.dim) new_C = ti.Matrix.zero(ti.f32, self.dim, self.dim) # loop over 3x3 grid node neighborhood for offset in ti.static(ti.grouped(self.stencil_range())): dpos = offset.cast(float) - fx g_v = self.grid_v[base + offset] weight = 1.0 for d in ti.static(range(self.dim)): weight *= w[offset[d]][d] new_v += weight * g_v new_C += 4 * self.inv_dx * weight * g_v.outer_product(dpos) self.v[p], self.C[p] = new_v, new_C self.x[p] += dt * self.v[p] # advection
def p2g_naive(): ti.block_dim(256) for p in x: u = (x[p] * N).cast(ti.i32) for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))): m3[u + offset] += (N * N / M) * 0.01
def p2g_naive(): ti.block_dim(256) for p in x: u = ti.floor(x[p] * N).cast(ti.i32) for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))): m3[u + offset] += scatter_weight
def reduce_seri() -> ti.f32: n = v1.shape[0] sum = 0.0 ti.block_dim(32) for _ in range(1): for i in range(n): sum += v1[i] * v2[i] return sum
def reduce_para() -> ti.f32: n = v1.shape[0] sum = 0.0 ti.block_dim( 32) # larger block_dim leads to less overhead; default dim = 32 for i in range(n): sum += v1[i] * v2[i] return sum
def insert(): ti.block_dim(256) for i in x: x[i] = ti.Vector([ ti.random() * (1 - 2 * bound) + bound, ti.random() * (1 - 2 * bound) + bound ]) ti.append(pid.parent(), [int(x[i][0] * N), int(x[i][1] * N)], i)
def insert(): ti.block_dim(256) for i in x: # It is important to ensure insert and p2g uses the exact same way to compute the base # coordinates. Otherwise there might be coordinate mismatch due to float-point errors. base = ti.Vector([ int(ti.floor(x[i][0] * N) - grid_offset[0]), int(ti.floor(x[i][1] * N) - grid_offset[1]) ]) ti.append(pid.parent(), base, i)
def insert(): ti.block_dim(256) for i in x: # Note that since we manually subtract grid offset from base, its values are always positive. # So no ti.floor is needed here and int() suffices. base = ti.Vector([ int(x[i][0] * N - grid_offset[0]), int(x[i][1] * N - grid_offset[1]) ]) ti.append(pid.parent(), base, i)
def update_Q(rk_step: ti.template()): ti.block_dim(256) ti.block_local(F_x, F_y) for i, j in Q: if is_interior_cell(i, j): if ti.static(rk_step == 0): Q[i, j] = Q[i, j] + dt[None] * (F_x[i, j] - F_x[i + 1, j] + F_y[i, j] - F_y[i, j + 1]) / h if ti.static(rk_step == 1): Q[i, j] = (Q[i, j] + Q_old[i, j]) / 2.0 + dt[None] * ( F_x[i, j] - F_x[i + 1, j] + F_y[i, j] - F_y[i, j + 1]) / h
def build_pid(self, pid: ti.template(), grid_m: ti.template(), offset: ti.template()): """ grid has blocking (e.g. 4x4x4), we wish to put the particles from each block into a GPU block, then used shared memory (ti.block_local) to accelerate :param pid: :param grid_m: :param offset: :return: """ ti.block_dim(64) for p in self.x: base = int(ti.floor(self.x[p] * self.inv_dx - 0.5)) \ - ti.Vector(list(self.offset)) # pid grandparent is `block` base_pid = ti.rescale_index(grid_m, pid.parent(2), base) ti.append(pid.parent(), base_pid, p)
def p2g(use_shared: ti.template(), m: ti.template()): ti.block_dim(256) if ti.static(use_shared): ti.cache_shared(m) for i, j, l in pid: p = pid[i, j, l] u_ = ti.floor(x[p] * N).cast(ti.i32) u0 = ti.assume_in_range(u_[0], i, 0, 1) u1 = ti.assume_in_range(u_[1], j, 0, 1) u = ti.Vector([u0, u1]) for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))): m[u + offset] += scatter_weight
def p2g(use_shared: ti.template(), m: ti.template()): ti.block_dim(256) if ti.static(use_shared): ti.block_local(m) for I in ti.grouped(pid): p = pid[I] u_ = ti.floor(x[p] * N).cast(ti.i32) Im = ti.rescale_index(pid, m, I) u0 = ti.assume_in_range(u_[0], Im[0], 0, 1) u1 = ti.assume_in_range(u_[1], Im[1], 0, 1) u = ti.Vector([u0, u1]) for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))): m[u + offset] += scatter_weight
def substep(): for I in ti.grouped(F_grid_m): F_grid_v[I] = ti.zero(F_grid_v[I]) F_grid_m[I] = 0 ti.block_dim(n_grid) for p in F_x: Xp = F_x[p] / dx base = int(Xp - 0.5) fx = Xp - base w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2] stress = -dt * 4 * E * p_vol * (F_J[p] - 1) / dx**2 affine = ti.Matrix.identity(float, dim) * stress + p_mass * F_C[p] for offset in ti.static(ti.grouped(ti.ndrange(*neighbour))): dpos = (offset - fx) * dx weight = 1.0 for i in ti.static(range(dim)): weight *= w[offset[i]][i] F_grid_v[base + offset] += weight * (p_mass * F_v[p] + affine @ dpos) F_grid_m[base + offset] += weight * p_mass for I in ti.grouped(F_grid_m): if F_grid_m[I] > 0: F_grid_v[I] /= F_grid_m[I] F_grid_v[I][1] -= dt * gravity cond = (I < bound) & (F_grid_v[I] < 0) | \ (I > n_grid - bound) & (F_grid_v[I] > 0) F_grid_v[I] = 0 if cond else F_grid_v[I] ti.block_dim(n_grid) for p in F_x: Xp = F_x[p] / dx base = int(Xp - 0.5) fx = Xp - base w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2] new_v = ti.zero(F_v[p]) new_C = ti.zero(F_C[p]) for offset in ti.static(ti.grouped(ti.ndrange(*neighbour))): dpos = (offset - fx) * dx weight = 1.0 for i in ti.static(range(dim)): weight *= w[offset[i]][i] g_v = F_grid_v[base + offset] new_v += weight * g_v new_C += 4 * weight * g_v.outer_product(dpos) / dx**2 F_v[p] = new_v F_x[p] += dt * F_v[p] F_J[p] *= 1 + dt * new_C.trace() F_C[p] = new_C
def stencil_2d(y: ti.template(), x: ti.template()): #reference: tests/python/bls_test_template.py if ti.static(bls and not scatter): ti.block_local(x) if ti.static(bls and scatter): ti.block_local(y) ti.block_dim(64) # 8*8=64 for I in ti.grouped(x): if ti.static(scatter): for offset in ti.static(stencil_common): y[I + ti.Vector(offset)] += x[I] else: # gather s = ti.cast(0.0, dtype) for offset in ti.static(stencil_common): s = s + x[I + ti.Vector(offset)] y[I] = s
def g2p(use_shared: ti.template(), s: ti.template()): ti.block_dim(256) if ti.static(use_shared): ti.cache_shared(m1) for i, j, l in pid: p = pid[i, j, l] u_ = ti.floor(x[p] * N).cast(ti.i32) u0 = ti.assume_in_range(u_[0], i, 0, 1) u1 = ti.assume_in_range(u_[1], j, 0, 1) u = ti.Vector([u0, u1]) tot = 0.0 for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))): tot += m1[u + offset] s[p] = tot
def multiply(self, x: ti.template(), b: ti.template()): for I in b: b[I] = ti.zero(b[I]) # Note the relationship H dx = - df, where H is the stiffness matrix # inertia part for I in x: b[I] += self.mass_matrix[I] * x[I] self.computeDvAndGradDv(x) # scratch_gradV is now temporaraly used for storing gradDV (evaluated at particles) # scratch_vp is now temporaraly used for storing DV (evaluated at particles) for p in self.x: self.scratch_stress[p] = ti.zero(self.scratch_stress[p]) for p in self.x: self.computeStressDifferential(p, self.scratch_gradV[p], self.scratch_stress[p], self.scratch_vp[p]) # scratch_stress is now V_p^0 dP (F_p^n)^T (dP is Ap in snow paper) ti.block_dim(self.n_grid) for p in self.x: Xp = self.x[p] * self.inv_dx base = int(Xp - 0.5) fx = Xp - base w = [ 0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2 ] # Quadratic kernels [http://mpm.graphics Eqn. 123, with x=fx, fx-1,fx-2] stress = self.scratch_stress[p] for offset in ti.static(ti.grouped(ti.ndrange(*self.neighbour))): dpos = (offset - fx) * self.dx weight = self.real(1) for i in ti.static(range(self.dim)): weight *= w[offset[i]][i] b[self.idx(base + offset)] += self.dt * self.dt * ( weight * stress @ dpos ) # fi -= \sum_p (Ap (xi-xp) - fp )w_ip Dp_inv
def compute_F(): ti.block_dim(256) ti.block_local(W) for i, j in Q: if is_interior_x_face(i, j): # muscl reconstrucion of left and right states with HLLC flux wL = ti.Vector([0.0, 0.0, 0.0, 0.0]) wR = ti.Vector([0.0, 0.0, 0.0, 0.0]) for f in ti.static(range(4)): ratio_l = (W[i, j][f] - W[i - 1, j][f]) / (W[i - 1, j][f] - W[i - 2, j][f]) ratio_r = (W[i, j][f] - W[i - 1, j][f]) / (W[i + 1, j][f] - W[i, j][f]) wL[f] = W[i - 1, j][f] + 0.5 * mc_lim(ratio_l) * ( W[i - 1, j][f] - W[i - 2, j][f]) wR[f] = W[i, j][f] - 0.5 * mc_lim(ratio_r) * (W[i + 1, j][f] - W[i, j][f]) F_x[i, j] = HLLC_flux(w_to_u(wL), w_to_u(wR), ti.Vector([1.0, 0.0])) elif is_boundary_x_face(i, j): F_x[i, j] = HLLC_flux(Q[i - 1, j], Q[i, j], ti.Vector([1.0, 0.0])) if is_interior_y_face(i, j): # muscl reconstrucion of left and right states with HLLC flux wL = ti.Vector([0.0, 0.0, 0.0, 0.0]) wR = ti.Vector([0.0, 0.0, 0.0, 0.0]) for f in ti.static(range(4)): ratio_l = (W[i, j][f] - W[i, j - 1][f]) / (W[i, j - 1][f] - W[i, j - 2][f]) ratio_r = (W[i, j][f] - W[i, j - 1][f]) / (W[i, j + 1][f] - W[i, j][f]) wL[f] = W[i, j - 1][f] + 0.5 * mc_lim(ratio_l) * ( W[i, j - 1][f] - W[i, j - 2][f]) wR[f] = W[i, j][f] - 0.5 * mc_lim(ratio_r) * (W[i, j + 1][f] - W[i, j][f]) F_y[i, j] = HLLC_flux(w_to_u(wL), w_to_u(wR), ti.Vector([0.0, 1.0])) elif is_boundary_y_face(i, j): F_y[i, j] = HLLC_flux(Q[i, j - 1], Q[i, j], ti.Vector([0.0, 1.0]))
def g2p(use_shared: ti.template(), s: ti.template()): ti.block_dim(256) if ti.static(use_shared): ti.block_local(m1) for I in ti.grouped(pid): p = pid[I] u_ = ti.floor(x[p] * N).cast(ti.i32) Im = ti.rescale_index(pid, m1, I) u0 = ti.assume_in_range(u_[0], Im[0], 0, 1) u1 = ti.assume_in_range(u_[1], Im[1], 0, 1) u = ti.Vector([u0, u1]) tot = 0.0 for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))): tot += m1[u + offset] s[p] = tot
def computeResidual(self): for I in self.dv: self.residual[I] = self.dt * self.mass_matrix[I] * self.gravity for I in self.dv: self.residual[I] -= self.mass_matrix[I] * self.dv[I] ti.block_dim(self.n_grid) for p in self.x: Xp = self.x[p] * self.inv_dx base = int(Xp - 0.5) fx = Xp - base w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2] new_C = ti.zero(self.C[p]) for offset in ti.static(ti.grouped(ti.ndrange(*self.neighbour))): dpos = (offset - fx) * self.dx weight = ti.cast(1.0, self.real) for i in ti.static(range(self.dim)): weight *= w[offset[i]][i] g_v = self.grid_v[base + offset] + self.dv[self.idx(base + offset)] new_C += 4 * self.inv_dx * weight * g_v.outer_product(dpos) F = (ti.Matrix.identity(self.real, self.dim) + self.dt * new_C) @ self.old_F[p] stress = (-self.p_vol * 4 * self.inv_dx * self.inv_dx) * self.dpsi_dF(F) @ F.transpose() for offset in ti.static(ti.grouped(ti.ndrange(*self.neighbour))): dpos = (offset - fx) * self.dx weight = ti.cast(1.0, self.real) for i in ti.static(range(self.dim)): weight *= w[offset[i]][i] force = weight * stress @ dpos self.residual[self.idx(base + offset)] += self.dt * force self.project(self.residual)
def updateState(self): ti.block_dim(self.n_grid) for p in self.x: Xp = self.x[p] * self.inv_dx base = int(Xp - 0.5) fx = Xp - base w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2] new_C = ti.zero(self.C[p]) for offset in ti.static(ti.grouped(ti.ndrange(*self.neighbour))): dpos = (offset - fx) * self.dx weight = ti.cast(1.0, self.real) for i in ti.static(range(self.dim)): weight *= w[offset[i]][i] g_v = self.grid_v[base + offset] + self.dv[self.idx(base + offset)] new_C += 4 * self.inv_dx * weight * g_v.outer_product(dpos) self.F[p] = (ti.Matrix.identity(self.real, self.dim) + self.dt * new_C) @ self.old_F[p] self.updateIsotropicHelper(p, self.F[p]) self.scratch_xp[p] = self.x[p] + self.dt * self.scratch_vp[p]
def fill(): ti.parallelize(8) ti.block_dim(8) for i in range(n): val[i] = i
def reduce(): ti.block_dim(1024) for i in a: tot[None] += a[i]
def fill(): ti.block_dim(128) for i in a: a[i] = i
def substep(g_x: float, g_y: float, g_z: float): for I in ti.grouped(grid_m): grid_v[I] = ti.zero(grid_v[I]) grid_m[I] = 0 ti.block_dim(n_grid) for p in x: if used[p] == 0: continue Xp = x[p] / dx base = int(Xp - 0.5) fx = Xp - base w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2] F[p] = (ti.Matrix.identity(float, 3) + dt * C[p]) @ F[p] # deformation gradient update h = ti.exp( 10 * (1.0 - Jp[p])) # Hardening coefficient: snow gets harder when compressed if materials[p] == JELLY: # jelly, make it softer h = 0.3 mu, la = mu_0 * h, lambda_0 * h if materials[p] == WATER: # liquid mu = 0.0 U, sig, V = ti.svd(F[p]) J = 1.0 for d in ti.static(range(3)): new_sig = sig[d, d] if materials[p] == SNOW: # Snow new_sig = min(max(sig[d, d], 1 - 2.5e-2), 1 + 4.5e-3) # Plasticity Jp[p] *= sig[d, d] / new_sig sig[d, d] = new_sig J *= new_sig if materials[ p] == WATER: # Reset deformation gradient to avoid numerical instability new_F = ti.Matrix.identity(float, 3) new_F[0, 0] = J F[p] = new_F elif materials[p] == SNOW: F[p] = U @ sig @ V.transpose( ) # Reconstruct elastic deformation gradient after plasticity stress = 2 * mu * (F[p] - U @ V.transpose()) @ F[p].transpose( ) + ti.Matrix.identity(float, 3) * la * J * (J - 1) stress = (-dt * p_vol * 4) * stress / dx**2 affine = stress + p_mass * C[p] for offset in ti.static(ti.grouped(ti.ndrange(*neighbour))): dpos = (offset - fx) * dx weight = 1.0 for i in ti.static(range(dim)): weight *= w[offset[i]][i] grid_v[base + offset] += weight * (p_mass * v[p] + affine @ dpos) grid_m[base + offset] += weight * p_mass for I in ti.grouped(grid_m): if grid_m[I] > 0: grid_v[I] /= grid_m[I] grid_v[I] += dt * ti.Vector([g_x, g_y, g_z]) cond = I < bound and grid_v[I] < 0 or I > n_grid - bound and grid_v[ I] > 0 grid_v[I] = 0 if cond else grid_v[I] ti.block_dim(n_grid) for p in x: if used[p] == 0: continue Xp = x[p] / dx base = int(Xp - 0.5) fx = Xp - base w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2] new_v = ti.zero(v[p]) new_C = ti.zero(C[p]) for offset in ti.static(ti.grouped(ti.ndrange(*neighbour))): dpos = (offset - fx) * dx weight = 1.0 for i in ti.static(range(dim)): weight *= w[offset[i]][i] g_v = grid_v[base + offset] new_v += weight * g_v new_C += 4 * weight * g_v.outer_product(dpos) / dx**2 v[p] = new_v x[p] += dt * v[p] C[p] = new_C
def p2g(self, dt: ti.f32): ti.no_activate(self.particle) ti.block_dim(256) ti.block_local(*self.grid_v.entries) ti.block_local(self.grid_m) for I in ti.grouped(self.pid): p = self.pid[I] base = ti.floor(self.x[p] * self.inv_dx - 0.5).cast(int) for D in ti.static(range(self.dim)): base[D] = ti.assume_in_range(base[D], I[D], 0, 1) fx = self.x[p] * self.inv_dx - base.cast(float) # Quadratic kernels [http://mpm.graphics Eqn. 123, with x=fx, fx-1,fx-2] w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2] # deformation gradient update self.F[p] = (ti.Matrix.identity(ti.f32, self.dim) + dt * self.C[p]) @ self.F[p] # Hardening coefficient: snow gets harder when compressed h = ti.exp(10 * (1.0 - self.Jp[p])) if self.material[ p] == self.material_elastic: # jelly, make it softer h = 0.3 mu, la = self.mu_0 * h, self.lambda_0 * h if self.material[p] == self.material_water: # liquid mu = 0.0 U, sig, V = ti.svd(self.F[p]) J = 1.0 if self.material[p] != self.material_sand: for d in ti.static(range(self.dim)): new_sig = sig[d, d] if self.material[p] == self.material_snow: # Snow new_sig = min(max(sig[d, d], 1 - 2.5e-2), 1 + 4.5e-3) # Plasticity self.Jp[p] *= sig[d, d] / new_sig sig[d, d] = new_sig J *= new_sig if self.material[p] == self.material_water: # Reset deformation gradient to avoid numerical instability new_F = ti.Matrix.identity(ti.f32, self.dim) new_F[0, 0] = J self.F[p] = new_F elif self.material[p] == self.material_snow: # Reconstruct elastic deformation gradient after plasticity self.F[p] = U @ sig @ V.transpose() stress = ti.Matrix.zero(ti.f32, self.dim, self.dim) if self.material[p] != self.material_sand: stress = 2 * mu * ( self.F[p] - U @ V.transpose()) @ self.F[p].transpose( ) + ti.Matrix.identity(ti.f32, self.dim) * la * J * (J - 1) else: sig = self.sand_projection(sig, p) self.F[p] = U @ sig @ V.transpose() log_sig_sum = 0.0 center = ti.Matrix.zero(ti.f32, self.dim, self.dim) for i in ti.static(range(self.dim)): log_sig_sum += ti.log(sig[i, i]) center[i, i] = 2.0 * self.mu_0 * ti.log( sig[i, i]) * (1 / sig[i, i]) for i in ti.static(range(self.dim)): center[i, i] += self.lambda_0 * log_sig_sum * (1 / sig[i, i]) stress = U @ center @ V.transpose() @ self.F[p].transpose() stress = (-dt * self.p_vol * 4 * self.inv_dx**2) * stress affine = stress + self.p_mass * self.C[p] # Loop over 3x3 grid node neighborhood for offset in ti.static(ti.grouped(self.stencil_range())): dpos = (offset.cast(float) - fx) * self.dx weight = 1.0 for d in ti.static(range(self.dim)): weight *= w[offset[d]][d] self.grid_v[base + offset] += weight * (self.p_mass * self.v[p] + affine @ dpos) self.grid_m[base + offset] += weight * self.p_mass