def reduce(self, dst): dst.tmp_comp[0] = serial_reduce_array(dst.psi >= 0.0, 'sum') dst.tmp_comp[1] = serial_reduce_array(dst.psi**2, 'sum') dst.get_carray('tmp_comp').set_data( parallel_reduce_array(dst.tmp_comp, 'sum')) epsilon = sqrt(dst.tmp_comp[1] / dst.tmp_comp[0]) if epsilon <= 1e-3: self.eqn_has_converged = 1
def reduce(self, dst): n = len(dst.x) tmp_sum_logrho = serial_reduce_array(dst.logrho, 'sum') sum_logrho = parallel_reduce_array(tmp_sum_logrho, 'sum') g = exp(sum_logrho / n) lamda = self.k * numpy.power(g / dst.rho, self.eps) dst.h[:] = lamda * dst.h0
def reduce(self, dst, t, dt): dst.tmp_comp[0] = serial_reduce_array(dst.compression > 0.0, 'sum') dst.tmp_comp[1] = serial_reduce_array(dst.compression, 'sum') dst.tmp_comp[:] = parallel_reduce_array(dst.tmp_comp, 'sum') if dst.tmp_comp[0] > 0: avg_rho = dst.tmp_comp[1] / dst.tmp_comp[0] else: avg_rho = self.rho0 self.compression = fabs(avg_rho - self.rho0) / self.rho0
def reduce(self, dst): dst.tmp_comp[0] = serial_reduce_array(dst.array.compression > 0.0, 'sum') dst.tmp_comp[1] = serial_reduce_array(dst.array.compression, 'sum') dst.tmp_comp.set_data(parallel_reduce_array(dst.tmp_comp, 'sum')) if dst.tmp_comp[0] > 0: comp = dst.tmp_comp[1]/dst.tmp_comp[0]/self.rho0 else: comp = 0.0 self.compression = comp
def reduce(self, d_rho, d_h, d_h0, dst): n = declare('int') k = declare('int') n = len(dst.x) tmp_sum_logrho = serial_reduce_array(dst.array.logrho, 'sum') sum_logrho = parallel_reduce_array(tmp_sum_logrho, 'sum') g = exp(sum_logrho / n) for k in range(n): lamda = self.k * pow(g / d_rho[k], self.eps) d_h[k] = lamda * d_h0[k]
def reduce(self, dst): # Reduce the temporary mi values in parallel across processors. dst.mi.set_data(parallel_reduce_array(dst.mi)) # Set the reduced values. nbody = declare('int') i = declare('int') base_mi = declare('int') base = declare('int') nbody = dst.num_body.data[0] for i in range(nbody): base_mi = i * 16 base = i * 3 m = dst.mi.data[base_mi + 0] dst.total_mass.data[i] = m cx = dst.mi.data[base_mi + 1] / m cy = dst.mi.data[base_mi + 2] / m cz = dst.mi.data[base_mi + 3] / m dst.cm.data[base + 0] = cx dst.cm.data[base + 1] = cy dst.cm.data[base + 2] = cz # The actual moment of inertia about center of mass from parallel # axes theorem. ixx = dst.mi.data[base_mi + 4] - (cy * cy + cz * cz) * m iyy = dst.mi.data[base_mi + 5] - (cx * cx + cz * cz) * m izz = dst.mi.data[base_mi + 6] - (cx * cx + cy * cy) * m ixy = dst.mi.data[base_mi + 7] + cx * cy * m ixz = dst.mi.data[base_mi + 8] + cx * cz * m iyz = dst.mi.data[base_mi + 9] + cy * cz * m dst.mi.data[base_mi + 0] = ixx dst.mi.data[base_mi + 1] = ixy dst.mi.data[base_mi + 2] = ixz dst.mi.data[base_mi + 3] = ixy dst.mi.data[base_mi + 4] = iyy dst.mi.data[base_mi + 5] = iyz dst.mi.data[base_mi + 6] = ixz dst.mi.data[base_mi + 7] = iyz dst.mi.data[base_mi + 8] = izz fx = dst.mi.data[base_mi + 10] fy = dst.mi.data[base_mi + 11] fz = dst.mi.data[base_mi + 12] dst.force.data[base + 0] = fx dst.force.data[base + 1] = fy dst.force.data[base + 2] = fz # Acceleration of CM. dst.ac.data[base + 0] = fx / m dst.ac.data[base + 1] = fy / m dst.ac.data[base + 2] = fz / m # Find torque about the Center of Mass and not origin. tx = dst.mi.data[base_mi + 13] ty = dst.mi.data[base_mi + 14] tz = dst.mi.data[base_mi + 15] tx -= cy * fz - cz * fy ty -= -cx * fz + cz * fx tz -= cx * fy - cy * fx dst.torque.data[base + 0] = tx dst.torque.data[base + 1] = ty dst.torque.data[base + 2] = tz wx = dst.omega.data[base + 0] wy = dst.omega.data[base + 1] wz = dst.omega.data[base + 2] # Find omega_dot from: omega_dot = inv(I) (\tau - w x (Iw)) # This was done using the sympy code above. tmp0 = iyz**2 tmp1 = ixy**2 tmp2 = ixz**2 tmp3 = ixx * iyy tmp4 = ixy * ixz tmp5 = 1. / (ixx * tmp0 + iyy * tmp2 - 2 * iyz * tmp4 + izz * tmp1 - izz * tmp3) tmp6 = ixy * izz - ixz * iyz tmp7 = ixz * wx + iyz * wy + izz * wz tmp8 = ixx * wx + ixy * wy + ixz * wz tmp9 = tmp7 * wx - tmp8 * wz + ty tmp10 = ixy * iyz - ixz * iyy tmp11 = ixy * wx + iyy * wy + iyz * wz tmp12 = -tmp11 * wx + tmp8 * wy + tz tmp13 = tmp11 * wz - tmp7 * wy + tx tmp14 = ixx * iyz - tmp4 dst.omega_dot.data[base + 0] = tmp5 * (-tmp10 * tmp12 - tmp13 * (iyy * izz - tmp0) + tmp6 * tmp9) dst.omega_dot.data[base + 1] = tmp5 * (tmp12 * tmp14 + tmp13 * tmp6 - tmp9 * (ixx * izz - tmp2)) dst.omega_dot.data[base + 2] = tmp5 * (-tmp10 * tmp13 - tmp12 * (-tmp1 + tmp3) + tmp14 * tmp9)
def reduce(self, dst): m = serial_reduce_array(dst.m, op='sum') dst.total_mass[0] = parallel_reduce_array(m, op='sum')
def py_initialize(self, dst, t, dt): from numpy import sqrt vmag = sqrt(dst.u**2 + dst.v**2 + dst.w**2) dst.vmax[0] = serial_reduce_array(vmag, 'max') dst.vmax[:] = parallel_reduce_array(dst.vmax, 'max')
def reduce(self, dst, t, dt): # FIXME: this will be slow in opencl nbody = declare('int') i = declare('int') base_mi = declare('int') base = declare('int') nbody = dst.num_body[0] if dst.gpu: dst.gpu.pull('omega', 'x', 'y', 'z', 'fx', 'fy', 'fz') d_mi = declare('object') m = declare('object') x = declare('object') y = declare('object') z = declare('object') fx = declare('object') fy = declare('object') fz = declare('object') d_mi = dst.mi cond = declare('object') for i in range(nbody): cond = dst.body_id == i base = i * 16 m = dst.m[cond] x = dst.x[cond] y = dst.y[cond] z = dst.z[cond] # Find the total_mass, center of mass and second moments. d_mi[base + 0] = numpy.sum(m) d_mi[base + 1] = numpy.sum(m * x) d_mi[base + 2] = numpy.sum(m * y) d_mi[base + 3] = numpy.sum(m * z) # Only do the lower triangle of values moments of inertia. d_mi[base + 4] = numpy.sum(m * (y * y + z * z)) d_mi[base + 5] = numpy.sum(m * (x * x + z * z)) d_mi[base + 6] = numpy.sum(m * (x * x + y * y)) d_mi[base + 7] = -numpy.sum(m * x * y) d_mi[base + 8] = -numpy.sum(m * x * z) d_mi[base + 9] = -numpy.sum(m * y * z) # the total force and torque fx = dst.fx[cond] fy = dst.fy[cond] fz = dst.fz[cond] d_mi[base + 10] = numpy.sum(fx) d_mi[base + 11] = numpy.sum(fy) d_mi[base + 12] = numpy.sum(fz) # Calculate the torque and reduce it. d_mi[base + 13] = numpy.sum(y * fz - z * fy) d_mi[base + 14] = numpy.sum(z * fx - x * fz) d_mi[base + 15] = numpy.sum(x * fy - y * fx) # Reduce the temporary mi values in parallel across processors. d_mi[:] = parallel_reduce_array(dst.mi) # Set the reduced values. for i in range(nbody): base_mi = i * 16 base = i * 3 m = d_mi[base_mi + 0] dst.total_mass[i] = m cx = d_mi[base_mi + 1] / m cy = d_mi[base_mi + 2] / m cz = d_mi[base_mi + 3] / m dst.cm[base + 0] = cx dst.cm[base + 1] = cy dst.cm[base + 2] = cz # The actual moment of inertia about center of mass from parallel # axes theorem. ixx = d_mi[base_mi + 4] - (cy * cy + cz * cz) * m iyy = d_mi[base_mi + 5] - (cx * cx + cz * cz) * m izz = d_mi[base_mi + 6] - (cx * cx + cy * cy) * m ixy = d_mi[base_mi + 7] + cx * cy * m ixz = d_mi[base_mi + 8] + cx * cz * m iyz = d_mi[base_mi + 9] + cy * cz * m d_mi[base_mi + 0] = ixx d_mi[base_mi + 1] = ixy d_mi[base_mi + 2] = ixz d_mi[base_mi + 3] = ixy d_mi[base_mi + 4] = iyy d_mi[base_mi + 5] = iyz d_mi[base_mi + 6] = ixz d_mi[base_mi + 7] = iyz d_mi[base_mi + 8] = izz fx = d_mi[base_mi + 10] fy = d_mi[base_mi + 11] fz = d_mi[base_mi + 12] dst.force[base + 0] = fx dst.force[base + 1] = fy dst.force[base + 2] = fz # Acceleration of CM. dst.ac[base + 0] = fx / m dst.ac[base + 1] = fy / m dst.ac[base + 2] = fz / m # Find torque about the Center of Mass and not origin. tx = d_mi[base_mi + 13] ty = d_mi[base_mi + 14] tz = d_mi[base_mi + 15] tx -= cy * fz - cz * fy ty -= -cx * fz + cz * fx tz -= cx * fy - cy * fx dst.torque[base + 0] = tx dst.torque[base + 1] = ty dst.torque[base + 2] = tz wx = dst.omega[base + 0] wy = dst.omega[base + 1] wz = dst.omega[base + 2] # Find omega_dot from: omega_dot = inv(I) (\tau - w x (Iw)) # This was done using the sympy code above. tmp0 = iyz**2 tmp1 = ixy**2 tmp2 = ixz**2 tmp3 = ixx * iyy tmp4 = ixy * ixz tmp5 = 1. / (ixx * tmp0 + iyy * tmp2 - 2 * iyz * tmp4 + izz * tmp1 - izz * tmp3) tmp6 = ixy * izz - ixz * iyz tmp7 = ixz * wx + iyz * wy + izz * wz tmp8 = ixx * wx + ixy * wy + ixz * wz tmp9 = tmp7 * wx - tmp8 * wz + ty tmp10 = ixy * iyz - ixz * iyy tmp11 = ixy * wx + iyy * wy + iyz * wz tmp12 = -tmp11 * wx + tmp8 * wy + tz tmp13 = tmp11 * wz - tmp7 * wy + tx tmp14 = ixx * iyz - tmp4 dst.omega_dot[base + 0] = tmp5 * (-tmp10 * tmp12 - tmp13 * (iyy * izz - tmp0) + tmp6 * tmp9) dst.omega_dot[base + 1] = tmp5 * (tmp12 * tmp14 + tmp13 * tmp6 - tmp9 * (ixx * izz - tmp2)) dst.omega_dot[base + 2] = tmp5 * (-tmp10 * tmp13 - tmp12 * (-tmp1 + tmp3) + tmp14 * tmp9) if dst.gpu: dst.gpu.push('total_mass', 'mi', 'cm', 'force', 'ac', 'torque', 'omega_dot')