rho=randomFloat(), pre=randomFloat(), temp=[randomFloat(), randomFloat(), randomFloat()]) def generateParticles(n): return [generateTestParticle() for i in xrange(n)] # Test 1 - increment all particle property values by 1 particles_test_data = generateParticles(32) gpu_particles = ParticleGPUInterface(particles_test_data) gpu_particles.run_cuda_function("increment_particle_properties") updated_particle = gpu_particles.getResultsFromDevice()[0] np.testing.assert_almost_equal(updated_particle.id, particles_test_data[0].id + 1, required_decimal_accuray) np.testing.assert_almost_equal(updated_particle.mass, particles_test_data[0].mass + 1, required_decimal_accuray) np.testing.assert_almost_equal(updated_particle.pos, particles_test_data[0].pos + 1, required_decimal_accuray) np.testing.assert_almost_equal(updated_particle.vel, particles_test_data[0].vel + 1, required_decimal_accuray) np.testing.assert_almost_equal(updated_particle.acc, particles_test_data[0].acc + 1,
def randomFloat(): return random.uniform(1, 1e3) def generateTestParticle(): return Particle(random.randint(1, 10000), randomFloat(), randomFloat(), randomFloat(), randomFloat(), randomFloat(), randomFloat(), randomFloat(), acc=[randomFloat(), randomFloat(), randomFloat()], rho=randomFloat(), pre=randomFloat(), temp=[randomFloat(), randomFloat(), randomFloat()]) def generateParticles(n): return [generateTestParticle() for i in xrange(n)] # Test 1 - increment all particle property values by 1 particles_test_data = generateParticles(32) gpu_particles = ParticleGPUInterface(particles_test_data) gpu_particles.run_cuda_function("increment_particle_properties") updated_particle = gpu_particles.getResultsFromDevice()[0] np.testing.assert_almost_equal(updated_particle.id, particles_test_data[0].id + 1, required_decimal_accuray) np.testing.assert_almost_equal(updated_particle.mass, particles_test_data[0].mass + 1, required_decimal_accuray) np.testing.assert_almost_equal(updated_particle.pos, particles_test_data[0].pos + 1, required_decimal_accuray) np.testing.assert_almost_equal(updated_particle.vel, particles_test_data[0].vel + 1, required_decimal_accuray) np.testing.assert_almost_equal(updated_particle.acc, particles_test_data[0].acc + 1, required_decimal_accuray) np.testing.assert_almost_equal(updated_particle.pressure, particles_test_data[0].pressure + 1, required_decimal_accuray) np.testing.assert_almost_equal(updated_particle.rho, particles_test_data[0].rho + 1, required_decimal_accuray) np.testing.assert_almost_equal(updated_particle.temp, particles_test_data[0].temp + 1, required_decimal_accuray) # Test 2 - Same as test 1, but apply +1 to all properties on multiple particles. Tests thread access index particles_test_data = generateParticles(32)
def sim(particles, bound, kernel, maxiter, pnum, smooth, t_norm, x_norm, interval, savefile, timestep, mode, verbosity): t = 0.0 # elapsed time if(kernel == "gaussian"): CHOOSE_KERNEL_CONST = 1 else: CHOOSE_KERNEL_CONST = 0 if mode == "parallel": import pycuda.driver as cuda import pycuda.autoinit import numpy from pycuda.compiler import SourceModule from gpu_interface import ParticleGPUInterface ''' So we can do this one of two ways. 1) Keep only one copy of the system in memory. This is what is implemented here. This does not require data duplication, but necessitates two iterations through the list. 2) Keep two lists. One for t = t_N, one for t = t_(N+1). This way we can remove a for-loop, but requires more memory. Luckily, it is likely these problems only affect the serial algorithm. ''' # output-100.csv = prefix + interval + file extension ary = savefile.split(".") # only split savefile once ([0]=prefix, [1]=extension) save = 0 if mode == "parallel": # init gpu interface, pass particles gpu_particles = ParticleGPUInterface(particles) print "[+] Saved @ iterations: ", while(t < (maxiter*timestep)): # print "t={}".format(t) if (save*interval) == t: # print "saving file" fname = "%s-%d.%s" % (ary[0], int(t), ary[1]) save += 1 # bump save counter string = "\b%d..." % int(t) # '\b' prints a backspace character to remove previous space print string, if mode == "parallel": particles = gpu_particles.getResultsFromDevice() saveParticles(particles, fname, verbosity) # main simulation loop if mode == "parallel": gpu_particles.run_cuda_function('run_simulation_loops', (numpy.int32(timestep), numpy.float32(smooth), numpy.int32(CHOOSE_KERNEL_CONST))) # Transfer the results back to CPU # Just for testing, this should not be done here else: # first sim loop (could use a better name, but I have no idea what this loop is doing) for p in particles: # preemptively start the Velocity Verlet computation (first half of velocity update part) p.vel += (timestep/2.0) * p.acc p.temp = p.acc p.acc = 0.0 p.rho = 0.0 p.pressure = 0.0 #get density for q in particles: p.rho += q.mass * find_kernel(CHOOSE_KERNEL_CONST, p.pos - q.pos, smooth) # while we're iterating, add contribution from gravity if(p.id != q.id): p.acc += Newtonian_gravity(p,q) # normalize density p.rho = p.rho / len(particles) p.pressure = pressure(p) # second sim loop for p in particles: # acceleration from pressure gradient for q in particles: if p.id != q.id: p.acc -= ( q.mass * ((p.pressure / (p.rho ** 2)) + (q.pressure / (q.rho ** 2))) * del_kernel(CHOOSE_KERNEL_CONST, p.pos - q.pos, smooth) ) * (1 / (np.linalg.norm(p.pos - q.pos))) * (p.pos - q.pos) # finish velocity update p.vel += (timestep/2.0) * p.acc ''' Velocity Verlet integration: Works only assuming force is velocity-independent http://en.wikipedia.org/wiki/Verlet_integration#Velocity_Verlet ''' # iterate AGAIN to do final position updates # save particles list to temporary holder - ensures we have consistent indexing throughout for loop # tempp = particles # third sim loop for p in particles: # perform position update p.pos += timestep * (p.vel + (timestep/2.0)*p.temp) # if np.linalg.norm(p.pos) > bound: # print "Particle %d position: %f out of range at iteration %d" % (p.id, np.linalg.norm(p.pos), int(t)) # tempp.remove(p) # particles = tempp t += timestep # advance time if mode == "parallel": particles = gpu_particles.getResultsFromDevice() # Always save the last interval print "\b%d\n" % int(t) fname = "%s-%d.%s" % (ary[0], int(t), ary[1]) saveParticles(particles, fname, verbosity) # returns the last t-value, which is useful for displaying total iterations # Also returns the final updated particles return (t, particles)