예제 #1
0
파일: cuda_tests.py 프로젝트: CLdelisle/SPH
                         randomFloat(),
                         randomFloat()],
                    rho=randomFloat(),
                    pre=randomFloat(),
                    temp=[randomFloat(),
                          randomFloat(),
                          randomFloat()])


def generateParticles(n):
    return [generateTestParticle() for i in xrange(n)]


# Test 1 - increment all particle property values by 1
particles_test_data = generateParticles(32)
gpu_particles = ParticleGPUInterface(particles_test_data)
gpu_particles.run_cuda_function("increment_particle_properties")
updated_particle = gpu_particles.getResultsFromDevice()[0]

np.testing.assert_almost_equal(updated_particle.id,
                               particles_test_data[0].id + 1,
                               required_decimal_accuray)
np.testing.assert_almost_equal(updated_particle.mass,
                               particles_test_data[0].mass + 1,
                               required_decimal_accuray)
np.testing.assert_almost_equal(updated_particle.pos,
                               particles_test_data[0].pos + 1,
                               required_decimal_accuray)
np.testing.assert_almost_equal(updated_particle.vel,
                               particles_test_data[0].vel + 1,
                               required_decimal_accuray)
예제 #2
0
파일: cuda_tests.py 프로젝트: ttnghia/SPH-2
required_decimal_accuray = 2

def randomFloat():
	return random.uniform(1, 1e3)

def generateTestParticle():
	return Particle(random.randint(1, 10000), randomFloat(), randomFloat(), randomFloat(), randomFloat(), randomFloat(), randomFloat(), randomFloat(),
		acc=[randomFloat(), randomFloat(), randomFloat()], rho=randomFloat(), pre=randomFloat(), temp=[randomFloat(), randomFloat(), randomFloat()])

def generateParticles(n):
	return [generateTestParticle() for i in xrange(n)]


# Test 1 - increment all particle property values by 1
particles_test_data = generateParticles(32)
gpu_particles = ParticleGPUInterface(particles_test_data)
gpu_particles.run_cuda_function("increment_particle_properties")
updated_particle = gpu_particles.getResultsFromDevice()[0]


np.testing.assert_almost_equal(updated_particle.id, particles_test_data[0].id + 1, required_decimal_accuray)
np.testing.assert_almost_equal(updated_particle.mass, particles_test_data[0].mass + 1, required_decimal_accuray)
np.testing.assert_almost_equal(updated_particle.pos, particles_test_data[0].pos + 1, required_decimal_accuray)
np.testing.assert_almost_equal(updated_particle.vel, particles_test_data[0].vel + 1, required_decimal_accuray)
np.testing.assert_almost_equal(updated_particle.acc, particles_test_data[0].acc + 1, required_decimal_accuray)
np.testing.assert_almost_equal(updated_particle.pressure, particles_test_data[0].pressure + 1, required_decimal_accuray)
np.testing.assert_almost_equal(updated_particle.rho, particles_test_data[0].rho + 1, required_decimal_accuray)
np.testing.assert_almost_equal(updated_particle.temp, particles_test_data[0].temp + 1, required_decimal_accuray)


# Test 2 - Same as test 1, but apply +1 to all properties on multiple particles. Tests thread access index
예제 #3
0
def sim(particles, bound, kernel, maxiter, pnum, smooth, t_norm, x_norm, interval, savefile, timestep, mode, verbosity):
	t = 0.0	 # elapsed time
	if(kernel == "gaussian"):
		CHOOSE_KERNEL_CONST = 1
	else:
		CHOOSE_KERNEL_CONST = 0

	if mode == "parallel":
		import pycuda.driver as cuda
		import pycuda.autoinit
		import numpy
		from pycuda.compiler import SourceModule

		from gpu_interface import ParticleGPUInterface
		'''
	So we can do this one of two ways.
	1) Keep only one copy of the system in memory.
	   This is what is implemented here. This does not require
	   data duplication, but necessitates two iterations through
	   the list.
	2) Keep two lists. One for t = t_N, one for t = t_(N+1).
	   This way we can remove a for-loop, but requires more memory.
	Luckily, it is likely these problems only affect the serial algorithm.
	'''
		# output-100.csv = prefix + interval + file extension
	ary = savefile.split(".")  # only split savefile once ([0]=prefix, [1]=extension)
	save = 0

	if mode == "parallel":
		# init gpu interface, pass particles
		gpu_particles = ParticleGPUInterface(particles)

	print "[+] Saved @ iterations: ",
	while(t < (maxiter*timestep)):
#			print "t={}".format(t)
			if (save*interval) == t:
#					print "saving file"
					fname = "%s-%d.%s" % (ary[0], int(t), ary[1])
					save += 1  # bump save counter
					string = "\b%d..." % int(t)	 # '\b' prints a backspace character to remove previous space
					print string,
					if mode == "parallel":
						particles = gpu_particles.getResultsFromDevice()
					saveParticles(particles, fname, verbosity)

			# main simulation loop
			if mode == "parallel":
				gpu_particles.run_cuda_function('run_simulation_loops', (numpy.int32(timestep), numpy.float32(smooth), numpy.int32(CHOOSE_KERNEL_CONST)))
				# Transfer the results back to CPU
				# Just for testing, this should not be done here

			else:
				# first sim loop (could use a better name, but I have no idea what this loop is doing)
				for p in particles:
						# preemptively start the Velocity Verlet computation (first half of velocity update part)
						p.vel += (timestep/2.0) * p.acc
						p.temp = p.acc
						p.acc = 0.0
						p.rho = 0.0
						p.pressure = 0.0
						#get density
						for q in particles:
							p.rho += q.mass * find_kernel(CHOOSE_KERNEL_CONST, p.pos - q.pos, smooth)
							# while we're iterating, add contribution from gravity
							if(p.id != q.id):
								p.acc += Newtonian_gravity(p,q)

						# normalize density
						p.rho = p.rho / len(particles)
						p.pressure = pressure(p)


				# second sim loop
				for p in particles:
					# acceleration from pressure gradient
					for q in particles:
						if p.id != q.id:
							p.acc -= ( q.mass * ((p.pressure / (p.rho ** 2)) + (q.pressure / (q.rho ** 2))) * del_kernel(CHOOSE_KERNEL_CONST, p.pos - q.pos, smooth) ) * (1 / (np.linalg.norm(p.pos - q.pos))) * (p.pos - q.pos)
					# finish velocity update
					p.vel += (timestep/2.0) * p.acc
				'''
				Velocity Verlet integration: Works only assuming force is velocity-independent
				http://en.wikipedia.org/wiki/Verlet_integration#Velocity_Verlet
				'''
				# iterate AGAIN to do final position updates
				# save particles list to temporary holder - ensures we have consistent indexing throughout for loop
				#	tempp = particles

				# third sim loop
				for p in particles:
					# perform position update
					p.pos += timestep * (p.vel + (timestep/2.0)*p.temp)
			#				if np.linalg.norm(p.pos) > bound:
			#						print "Particle %d position: %f out of range at iteration %d" % (p.id, np.linalg.norm(p.pos), int(t))
			#						tempp.remove(p)
			#		particles = tempp
			t += timestep  # advance time

	if mode == "parallel":
		particles = gpu_particles.getResultsFromDevice()

	# Always save the last interval
	print "\b%d\n" % int(t)
	fname = "%s-%d.%s" % (ary[0], int(t), ary[1])
	saveParticles(particles, fname, verbosity)
	# returns the last t-value, which is useful for displaying total iterations
	# Also returns the final updated particles
	return (t, particles)