Beispiel #1
0
 def foo():
     ti.block_dim(512)
     ti.block_local(a)
     for i, j in a:
         for k in range(stencil_length):
             b[i, j] += a[i + k, j]
             b[i, j] += a[i, j + k]
Beispiel #2
0
 def g2p(self, dt: ti.f32):
     ti.block_dim(256)
     ti.block_local(*self.grid_v.entries)
     ti.no_activate(self.particle)
     for I in ti.grouped(self.pid):
         p = self.pid[I]
         base = ti.floor(self.x[p] * self.inv_dx - 0.5).cast(int)
         for D in ti.static(range(self.dim)):
             base[D] = ti.assume_in_range(base[D], I[D], 0, 1)
         fx = self.x[p] * self.inv_dx - base.cast(float)
         w = [
             0.5 * (1.5 - fx)**2, 0.75 - (fx - 1.0)**2, 0.5 * (fx - 0.5)**2
         ]
         new_v = ti.Vector.zero(ti.f32, self.dim)
         new_C = ti.Matrix.zero(ti.f32, self.dim, self.dim)
         # loop over 3x3 grid node neighborhood
         for offset in ti.static(ti.grouped(self.stencil_range())):
             dpos = offset.cast(float) - fx
             g_v = self.grid_v[base + offset]
             weight = 1.0
             for d in ti.static(range(self.dim)):
                 weight *= w[offset[d]][d]
             new_v += weight * g_v
             new_C += 4 * self.inv_dx * weight * g_v.outer_product(dpos)
         self.v[p], self.C[p] = new_v, new_C
         self.x[p] += dt * self.v[p]  # advection
Beispiel #3
0
    def p2g_naive():
        ti.block_dim(256)
        for p in x:
            u = (x[p] * N).cast(ti.i32)

            for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))):
                m3[u + offset] += (N * N / M) * 0.01
Beispiel #4
0
    def p2g_naive():
        ti.block_dim(256)
        for p in x:
            u = ti.floor(x[p] * N).cast(ti.i32)

            for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))):
                m3[u + offset] += scatter_weight
Beispiel #5
0
def reduce_seri() -> ti.f32:
    n = v1.shape[0]
    sum = 0.0
    ti.block_dim(32)
    for _ in range(1):
        for i in range(n):
            sum += v1[i] * v2[i]
    return sum
Beispiel #6
0
def reduce_para() -> ti.f32:
    n = v1.shape[0]
    sum = 0.0
    ti.block_dim(
        32)  # larger block_dim leads to less overhead; default dim = 32
    for i in range(n):
        sum += v1[i] * v2[i]
    return sum
Beispiel #7
0
 def insert():
     ti.block_dim(256)
     for i in x:
         x[i] = ti.Vector([
             ti.random() * (1 - 2 * bound) + bound,
             ti.random() * (1 - 2 * bound) + bound
         ])
         ti.append(pid.parent(), [int(x[i][0] * N), int(x[i][1] * N)], i)
Beispiel #8
0
 def insert():
     ti.block_dim(256)
     for i in x:
         # It is important to ensure insert and p2g uses the exact same way to compute the base
         # coordinates. Otherwise there might be coordinate mismatch due to float-point errors.
         base = ti.Vector([
             int(ti.floor(x[i][0] * N) - grid_offset[0]),
             int(ti.floor(x[i][1] * N) - grid_offset[1])
         ])
         ti.append(pid.parent(), base, i)
Beispiel #9
0
 def insert():
     ti.block_dim(256)
     for i in x:
         # Note that since we manually subtract grid offset from base, its values are always positive.
         # So no ti.floor is needed here and int() suffices.
         base = ti.Vector([
             int(x[i][0] * N - grid_offset[0]),
             int(x[i][1] * N - grid_offset[1])
         ])
         ti.append(pid.parent(), base, i)
Beispiel #10
0
def update_Q(rk_step: ti.template()):
    ti.block_dim(256)
    ti.block_local(F_x, F_y)
    for i, j in Q:
        if is_interior_cell(i, j):
            if ti.static(rk_step == 0):
                Q[i, j] = Q[i, j] + dt[None] * (F_x[i, j] - F_x[i + 1, j] +
                                                F_y[i, j] - F_y[i, j + 1]) / h
            if ti.static(rk_step == 1):
                Q[i, j] = (Q[i, j] + Q_old[i, j]) / 2.0 + dt[None] * (
                    F_x[i, j] - F_x[i + 1, j] + F_y[i, j] - F_y[i, j + 1]) / h
 def build_pid(self, pid: ti.template(), grid_m: ti.template(), offset: ti.template()):
     """
     grid has blocking (e.g. 4x4x4), we wish to put the particles from each block into a GPU block,
     then used shared memory (ti.block_local) to accelerate
     :param pid:
     :param grid_m:
     :param offset:
     :return:
     """
     ti.block_dim(64)
     for p in self.x:
         base = int(ti.floor(self.x[p] * self.inv_dx - 0.5)) \
                - ti.Vector(list(self.offset))
         # pid grandparent is `block`
         base_pid = ti.rescale_index(grid_m, pid.parent(2), base)
         ti.append(pid.parent(), base_pid, p)
Beispiel #12
0
    def p2g(use_shared: ti.template(), m: ti.template()):
        ti.block_dim(256)
        if ti.static(use_shared):
            ti.cache_shared(m)
        for i, j, l in pid:
            p = pid[i, j, l]

            u_ = ti.floor(x[p] * N).cast(ti.i32)

            u0 = ti.assume_in_range(u_[0], i, 0, 1)
            u1 = ti.assume_in_range(u_[1], j, 0, 1)

            u = ti.Vector([u0, u1])

            for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))):
                m[u + offset] += scatter_weight
Beispiel #13
0
    def p2g(use_shared: ti.template(), m: ti.template()):
        ti.block_dim(256)
        if ti.static(use_shared):
            ti.block_local(m)
        for I in ti.grouped(pid):
            p = pid[I]

            u_ = ti.floor(x[p] * N).cast(ti.i32)
            Im = ti.rescale_index(pid, m, I)
            u0 = ti.assume_in_range(u_[0], Im[0], 0, 1)
            u1 = ti.assume_in_range(u_[1], Im[1], 0, 1)

            u = ti.Vector([u0, u1])

            for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))):
                m[u + offset] += scatter_weight
Beispiel #14
0
def substep():
    for I in ti.grouped(F_grid_m):
        F_grid_v[I] = ti.zero(F_grid_v[I])
        F_grid_m[I] = 0
    ti.block_dim(n_grid)
    for p in F_x:
        Xp = F_x[p] / dx
        base = int(Xp - 0.5)
        fx = Xp - base
        w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
        stress = -dt * 4 * E * p_vol * (F_J[p] - 1) / dx**2
        affine = ti.Matrix.identity(float, dim) * stress + p_mass * F_C[p]
        for offset in ti.static(ti.grouped(ti.ndrange(*neighbour))):
            dpos = (offset - fx) * dx
            weight = 1.0
            for i in ti.static(range(dim)):
                weight *= w[offset[i]][i]
            F_grid_v[base +
                     offset] += weight * (p_mass * F_v[p] + affine @ dpos)
            F_grid_m[base + offset] += weight * p_mass
    for I in ti.grouped(F_grid_m):
        if F_grid_m[I] > 0:
            F_grid_v[I] /= F_grid_m[I]
        F_grid_v[I][1] -= dt * gravity
        cond = (I < bound) & (F_grid_v[I] < 0) | \
               (I > n_grid - bound) & (F_grid_v[I] > 0)
        F_grid_v[I] = 0 if cond else F_grid_v[I]
    ti.block_dim(n_grid)
    for p in F_x:
        Xp = F_x[p] / dx
        base = int(Xp - 0.5)
        fx = Xp - base
        w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
        new_v = ti.zero(F_v[p])
        new_C = ti.zero(F_C[p])
        for offset in ti.static(ti.grouped(ti.ndrange(*neighbour))):
            dpos = (offset - fx) * dx
            weight = 1.0
            for i in ti.static(range(dim)):
                weight *= w[offset[i]][i]
            g_v = F_grid_v[base + offset]
            new_v += weight * g_v
            new_C += 4 * weight * g_v.outer_product(dpos) / dx**2
        F_v[p] = new_v
        F_x[p] += dt * F_v[p]
        F_J[p] *= 1 + dt * new_C.trace()
        F_C[p] = new_C
Beispiel #15
0
    def stencil_2d(y: ti.template(), x: ti.template()):
        #reference: tests/python/bls_test_template.py
        if ti.static(bls and not scatter):
            ti.block_local(x)
        if ti.static(bls and scatter):
            ti.block_local(y)
        ti.block_dim(64)  # 8*8=64

        for I in ti.grouped(x):
            if ti.static(scatter):
                for offset in ti.static(stencil_common):
                    y[I + ti.Vector(offset)] += x[I]
            else:  # gather
                s = ti.cast(0.0, dtype)
                for offset in ti.static(stencil_common):
                    s = s + x[I + ti.Vector(offset)]
                y[I] = s
Beispiel #16
0
    def g2p(use_shared: ti.template(), s: ti.template()):
        ti.block_dim(256)
        if ti.static(use_shared):
            ti.cache_shared(m1)
        for i, j, l in pid:
            p = pid[i, j, l]

            u_ = ti.floor(x[p] * N).cast(ti.i32)

            u0 = ti.assume_in_range(u_[0], i, 0, 1)
            u1 = ti.assume_in_range(u_[1], j, 0, 1)

            u = ti.Vector([u0, u1])

            tot = 0.0

            for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))):
                tot += m1[u + offset]

            s[p] = tot
    def multiply(self, x: ti.template(), b: ti.template()):
        for I in b:
            b[I] = ti.zero(b[I])

        # Note the relationship H dx = - df, where H is the stiffness matrix
        # inertia part
        for I in x:
            b[I] += self.mass_matrix[I] * x[I]

        self.computeDvAndGradDv(x)

        # scratch_gradV is now temporaraly used for storing gradDV (evaluated at particles)
        # scratch_vp is now temporaraly used for storing DV (evaluated at particles)

        for p in self.x:
            self.scratch_stress[p] = ti.zero(self.scratch_stress[p])

        for p in self.x:
            self.computeStressDifferential(p, self.scratch_gradV[p],
                                           self.scratch_stress[p],
                                           self.scratch_vp[p])
            # scratch_stress is now V_p^0 dP (F_p^n)^T (dP is Ap in snow paper)

        ti.block_dim(self.n_grid)
        for p in self.x:
            Xp = self.x[p] * self.inv_dx
            base = int(Xp - 0.5)
            fx = Xp - base
            w = [
                0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2
            ]  # Quadratic kernels  [http://mpm.graphics   Eqn. 123, with x=fx, fx-1,fx-2]
            stress = self.scratch_stress[p]
            for offset in ti.static(ti.grouped(ti.ndrange(*self.neighbour))):
                dpos = (offset - fx) * self.dx
                weight = self.real(1)
                for i in ti.static(range(self.dim)):
                    weight *= w[offset[i]][i]

                b[self.idx(base + offset)] += self.dt * self.dt * (
                    weight * stress @ dpos
                )  # fi -= \sum_p (Ap (xi-xp)  - fp )w_ip Dp_inv
Beispiel #18
0
def compute_F():
    ti.block_dim(256)
    ti.block_local(W)
    for i, j in Q:
        if is_interior_x_face(i, j):
            # muscl reconstrucion of left and right states with HLLC flux
            wL = ti.Vector([0.0, 0.0, 0.0, 0.0])
            wR = ti.Vector([0.0, 0.0, 0.0, 0.0])
            for f in ti.static(range(4)):
                ratio_l = (W[i, j][f] - W[i - 1, j][f]) / (W[i - 1, j][f] -
                                                           W[i - 2, j][f])
                ratio_r = (W[i, j][f] - W[i - 1, j][f]) / (W[i + 1, j][f] -
                                                           W[i, j][f])
                wL[f] = W[i - 1, j][f] + 0.5 * mc_lim(ratio_l) * (
                    W[i - 1, j][f] - W[i - 2, j][f])
                wR[f] = W[i, j][f] - 0.5 * mc_lim(ratio_r) * (W[i + 1, j][f] -
                                                              W[i, j][f])
            F_x[i, j] = HLLC_flux(w_to_u(wL), w_to_u(wR), ti.Vector([1.0,
                                                                     0.0]))

        elif is_boundary_x_face(i, j):
            F_x[i, j] = HLLC_flux(Q[i - 1, j], Q[i, j], ti.Vector([1.0, 0.0]))

        if is_interior_y_face(i, j):
            # muscl reconstrucion of left and right states with HLLC flux
            wL = ti.Vector([0.0, 0.0, 0.0, 0.0])
            wR = ti.Vector([0.0, 0.0, 0.0, 0.0])
            for f in ti.static(range(4)):
                ratio_l = (W[i, j][f] - W[i, j - 1][f]) / (W[i, j - 1][f] -
                                                           W[i, j - 2][f])
                ratio_r = (W[i, j][f] - W[i, j - 1][f]) / (W[i, j + 1][f] -
                                                           W[i, j][f])
                wL[f] = W[i, j - 1][f] + 0.5 * mc_lim(ratio_l) * (
                    W[i, j - 1][f] - W[i, j - 2][f])
                wR[f] = W[i, j][f] - 0.5 * mc_lim(ratio_r) * (W[i, j + 1][f] -
                                                              W[i, j][f])
            F_y[i, j] = HLLC_flux(w_to_u(wL), w_to_u(wR), ti.Vector([0.0,
                                                                     1.0]))

        elif is_boundary_y_face(i, j):
            F_y[i, j] = HLLC_flux(Q[i, j - 1], Q[i, j], ti.Vector([0.0, 1.0]))
Beispiel #19
0
    def g2p(use_shared: ti.template(), s: ti.template()):
        ti.block_dim(256)
        if ti.static(use_shared):
            ti.block_local(m1)
        for I in ti.grouped(pid):
            p = pid[I]

            u_ = ti.floor(x[p] * N).cast(ti.i32)

            Im = ti.rescale_index(pid, m1, I)
            u0 = ti.assume_in_range(u_[0], Im[0], 0, 1)
            u1 = ti.assume_in_range(u_[1], Im[1], 0, 1)

            u = ti.Vector([u0, u1])

            tot = 0.0

            for offset in ti.static(ti.grouped(ti.ndrange(extend, extend))):
                tot += m1[u + offset]

            s[p] = tot
    def computeResidual(self):
        for I in self.dv:
            self.residual[I] = self.dt * self.mass_matrix[I] * self.gravity

        for I in self.dv:
            self.residual[I] -= self.mass_matrix[I] * self.dv[I]

        ti.block_dim(self.n_grid)
        for p in self.x:
            Xp = self.x[p] * self.inv_dx
            base = int(Xp - 0.5)
            fx = Xp - base
            w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
            new_C = ti.zero(self.C[p])
            for offset in ti.static(ti.grouped(ti.ndrange(*self.neighbour))):
                dpos = (offset - fx) * self.dx
                weight = ti.cast(1.0, self.real)
                for i in ti.static(range(self.dim)):
                    weight *= w[offset[i]][i]

                g_v = self.grid_v[base + offset] + self.dv[self.idx(base +
                                                                    offset)]
                new_C += 4 * self.inv_dx * weight * g_v.outer_product(dpos)

            F = (ti.Matrix.identity(self.real, self.dim) +
                 self.dt * new_C) @ self.old_F[p]
            stress = (-self.p_vol * 4 * self.inv_dx *
                      self.inv_dx) * self.dpsi_dF(F) @ F.transpose()

            for offset in ti.static(ti.grouped(ti.ndrange(*self.neighbour))):
                dpos = (offset - fx) * self.dx
                weight = ti.cast(1.0, self.real)
                for i in ti.static(range(self.dim)):
                    weight *= w[offset[i]][i]

                force = weight * stress @ dpos
                self.residual[self.idx(base + offset)] += self.dt * force

        self.project(self.residual)
    def updateState(self):
        ti.block_dim(self.n_grid)
        for p in self.x:
            Xp = self.x[p] * self.inv_dx
            base = int(Xp - 0.5)
            fx = Xp - base
            w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
            new_C = ti.zero(self.C[p])
            for offset in ti.static(ti.grouped(ti.ndrange(*self.neighbour))):
                dpos = (offset - fx) * self.dx
                weight = ti.cast(1.0, self.real)
                for i in ti.static(range(self.dim)):
                    weight *= w[offset[i]][i]

                g_v = self.grid_v[base + offset] + self.dv[self.idx(base +
                                                                    offset)]
                new_C += 4 * self.inv_dx * weight * g_v.outer_product(dpos)

            self.F[p] = (ti.Matrix.identity(self.real, self.dim) +
                         self.dt * new_C) @ self.old_F[p]
            self.updateIsotropicHelper(p, self.F[p])
            self.scratch_xp[p] = self.x[p] + self.dt * self.scratch_vp[p]
Beispiel #22
0
 def fill():
     ti.parallelize(8)
     ti.block_dim(8)
     for i in range(n):
         val[i] = i
Beispiel #23
0
def reduce():
    ti.block_dim(1024)
    for i in a:
        tot[None] += a[i]
Beispiel #24
0
def fill():
    ti.block_dim(128)
    for i in a:
        a[i] = i
Beispiel #25
0
def substep(g_x: float, g_y: float, g_z: float):
    for I in ti.grouped(grid_m):
        grid_v[I] = ti.zero(grid_v[I])
        grid_m[I] = 0
    ti.block_dim(n_grid)
    for p in x:
        if used[p] == 0:
            continue
        Xp = x[p] / dx
        base = int(Xp - 0.5)
        fx = Xp - base
        w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]

        F[p] = (ti.Matrix.identity(float, 3) +
                dt * C[p]) @ F[p]  # deformation gradient update

        h = ti.exp(
            10 *
            (1.0 -
             Jp[p]))  # Hardening coefficient: snow gets harder when compressed
        if materials[p] == JELLY:  # jelly, make it softer
            h = 0.3
        mu, la = mu_0 * h, lambda_0 * h
        if materials[p] == WATER:  # liquid
            mu = 0.0

        U, sig, V = ti.svd(F[p])
        J = 1.0
        for d in ti.static(range(3)):
            new_sig = sig[d, d]
            if materials[p] == SNOW:  # Snow
                new_sig = min(max(sig[d, d], 1 - 2.5e-2),
                              1 + 4.5e-3)  # Plasticity
            Jp[p] *= sig[d, d] / new_sig
            sig[d, d] = new_sig
            J *= new_sig
        if materials[
                p] == WATER:  # Reset deformation gradient to avoid numerical instability
            new_F = ti.Matrix.identity(float, 3)
            new_F[0, 0] = J
            F[p] = new_F
        elif materials[p] == SNOW:
            F[p] = U @ sig @ V.transpose(
            )  # Reconstruct elastic deformation gradient after plasticity
        stress = 2 * mu * (F[p] - U @ V.transpose()) @ F[p].transpose(
        ) + ti.Matrix.identity(float, 3) * la * J * (J - 1)
        stress = (-dt * p_vol * 4) * stress / dx**2
        affine = stress + p_mass * C[p]

        for offset in ti.static(ti.grouped(ti.ndrange(*neighbour))):
            dpos = (offset - fx) * dx
            weight = 1.0
            for i in ti.static(range(dim)):
                weight *= w[offset[i]][i]
            grid_v[base + offset] += weight * (p_mass * v[p] + affine @ dpos)
            grid_m[base + offset] += weight * p_mass
    for I in ti.grouped(grid_m):
        if grid_m[I] > 0:
            grid_v[I] /= grid_m[I]
        grid_v[I] += dt * ti.Vector([g_x, g_y, g_z])
        cond = I < bound and grid_v[I] < 0 or I > n_grid - bound and grid_v[
            I] > 0
        grid_v[I] = 0 if cond else grid_v[I]
    ti.block_dim(n_grid)
    for p in x:
        if used[p] == 0:
            continue
        Xp = x[p] / dx
        base = int(Xp - 0.5)
        fx = Xp - base
        w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
        new_v = ti.zero(v[p])
        new_C = ti.zero(C[p])
        for offset in ti.static(ti.grouped(ti.ndrange(*neighbour))):
            dpos = (offset - fx) * dx
            weight = 1.0
            for i in ti.static(range(dim)):
                weight *= w[offset[i]][i]
            g_v = grid_v[base + offset]
            new_v += weight * g_v
            new_C += 4 * weight * g_v.outer_product(dpos) / dx**2
        v[p] = new_v
        x[p] += dt * v[p]
        C[p] = new_C
    def p2g(self, dt: ti.f32):
        ti.no_activate(self.particle)
        ti.block_dim(256)
        ti.block_local(*self.grid_v.entries)
        ti.block_local(self.grid_m)
        for I in ti.grouped(self.pid):
            p = self.pid[I]
            base = ti.floor(self.x[p] * self.inv_dx - 0.5).cast(int)
            for D in ti.static(range(self.dim)):
                base[D] = ti.assume_in_range(base[D], I[D], 0, 1)

            fx = self.x[p] * self.inv_dx - base.cast(float)
            # Quadratic kernels  [http://mpm.graphics   Eqn. 123, with x=fx, fx-1,fx-2]
            w = [0.5 * (1.5 - fx)**2, 0.75 - (fx - 1)**2, 0.5 * (fx - 0.5)**2]
            # deformation gradient update
            self.F[p] = (ti.Matrix.identity(ti.f32, self.dim) +
                         dt * self.C[p]) @ self.F[p]
            # Hardening coefficient: snow gets harder when compressed
            h = ti.exp(10 * (1.0 - self.Jp[p]))
            if self.material[
                    p] == self.material_elastic:  # jelly, make it softer
                h = 0.3
            mu, la = self.mu_0 * h, self.lambda_0 * h
            if self.material[p] == self.material_water:  # liquid
                mu = 0.0
            U, sig, V = ti.svd(self.F[p])
            J = 1.0
            if self.material[p] != self.material_sand:
                for d in ti.static(range(self.dim)):
                    new_sig = sig[d, d]
                    if self.material[p] == self.material_snow:  # Snow
                        new_sig = min(max(sig[d, d], 1 - 2.5e-2),
                                      1 + 4.5e-3)  # Plasticity
                    self.Jp[p] *= sig[d, d] / new_sig
                    sig[d, d] = new_sig
                    J *= new_sig
            if self.material[p] == self.material_water:
                # Reset deformation gradient to avoid numerical instability
                new_F = ti.Matrix.identity(ti.f32, self.dim)
                new_F[0, 0] = J
                self.F[p] = new_F
            elif self.material[p] == self.material_snow:
                # Reconstruct elastic deformation gradient after plasticity
                self.F[p] = U @ sig @ V.transpose()

            stress = ti.Matrix.zero(ti.f32, self.dim, self.dim)

            if self.material[p] != self.material_sand:
                stress = 2 * mu * (
                    self.F[p] - U @ V.transpose()) @ self.F[p].transpose(
                    ) + ti.Matrix.identity(ti.f32, self.dim) * la * J * (J - 1)
            else:
                sig = self.sand_projection(sig, p)
                self.F[p] = U @ sig @ V.transpose()
                log_sig_sum = 0.0
                center = ti.Matrix.zero(ti.f32, self.dim, self.dim)
                for i in ti.static(range(self.dim)):
                    log_sig_sum += ti.log(sig[i, i])
                    center[i, i] = 2.0 * self.mu_0 * ti.log(
                        sig[i, i]) * (1 / sig[i, i])
                for i in ti.static(range(self.dim)):
                    center[i,
                           i] += self.lambda_0 * log_sig_sum * (1 / sig[i, i])
                stress = U @ center @ V.transpose() @ self.F[p].transpose()

            stress = (-dt * self.p_vol * 4 * self.inv_dx**2) * stress
            affine = stress + self.p_mass * self.C[p]

            # Loop over 3x3 grid node neighborhood
            for offset in ti.static(ti.grouped(self.stencil_range())):
                dpos = (offset.cast(float) - fx) * self.dx
                weight = 1.0
                for d in ti.static(range(self.dim)):
                    weight *= w[offset[d]][d]
                self.grid_v[base +
                            offset] += weight * (self.p_mass * self.v[p] +
                                                 affine @ dpos)
                self.grid_m[base + offset] += weight * self.p_mass