forked from healther/pynbody
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nbody.py
100 lines (73 loc) · 3.72 KB
/
nbody.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from __future__ import print_function, division
import numpy as np
import pycuda.autoinit
import pycuda.driver as drv
from pycuda import gpuarray
from pycuda.compiler import SourceModule
advance_kernel = """
__global__ void advance(const double * __restrict__ r_old, const double * __restrict__ v_old, const double * __restrict__ mass, double * __restrict__ r_new, double * __restrict__ v_new, const int number_of_particles){
const int tx = threadIdx.x + blockDim.x * blockIdx.x;
if(tx>=n) return;
const int particle_id = tx;
const double particle_x = r_old[particle_id];
const double particle_y = r_old[particle_id + number_of_particles];
const double particle_z = r_old[particle_id + 2*number_of_particles];
const double particle_vx = r_old[particle_id];
const double particle_vy = r_old[particle_id + number_of_particles];
const double particle_vz = r_old[particle_id + 2*number_of_particles];
const double particle_mass = mass[particle_id];
double distance = 0.;
double acceleration_vx = 0.;
double acceleration_vy = 0.;
double acceleration_vz = 0.;
for(int i=0; i<number_of_particles; ++i){
if(i != particle_id){
distance = sprt(
(particle_x-r_old[i])*(particle_x-r_old[i])
+
(particle_y-r_old[i + number_of_particles])*(particle_x-r_old[i + number_of_particles])
+
(particle_z-r_old[i + 2*number_of_particles])*(particle_x-r_old[i + 2*number_of_particles])
);
acceleration_x += mass[i] * (r_old[i] - particle_x) / distance**3;
acceleration_y += mass[i] * (r_old[i + number_of_particles] - particle_x) / distance**3;
acceleration_z += mass[i] * (r_old[i + 2*number_of_particles] - particle_x) / distance**3;
}
r_new[particle_id] = particle_vx * timestep;
r_new[particle_id + number_of_particles] = particle_vy * timestep;
r_new[particle_id + 2 * number_of_particles] = particle_vz * timestep;
v_new[particle_id] = acceleration_x * timestep;
v_new[particle_id + number_of_particles] = acceleration_y * timestep;
v_new[particle_id + 2 * number_of_particles] = acceleration_z * timestep;
}
}
"""
def integrate(stepsize = .01, stores = 5, steps=10000, number_of_particles=2**10):
gpu_r, gpu_v, gpu_mass = create_particles(number_of_particles)
number_of_particles = np.int32(number_of_particles)
gpu_rs, gpu_vs = [gpu_r], [gpu_v]
for i in xrange(stores-1):
gpu_rs.append(gpuarray.empty_like(gpu_r))
gpu_vs.append(gpuarray.empty_like(gpu_v))
advance = SourceModule(advance_kernel).get_function("advance")
advance.prepare([np.intp, np.intp, np.intp, np.intp, np.intp, np.int32])
block_size = (32,0,0)
grid_size = (int(number_of_particles/32), 0, 0)
advance.prepared_call(block_size, grid_size ,gpu_r[0], gpu_v[0], gpu_mass, gpu_r[1], gpu_v[1], number_of_particles)
old, new = 1, 2
for i in xrange(steps):
r = rs_gpu[old].get_async()
v = vs_gpu[old].get_async()
advance.prepared_call_async(block_size, grid_size ,gpu_rs[old], gpu_vs[old], gpu_mass, gpu_rs[new], gpu_vs[new], number_of_particles)
np.write("step{i:4}_r".format(i*stepsize)+".dat", r)
np.write("step{i:4}_v".format(i*stepsize)+".dat", r)
old, new = new, (new+1)%stores
def create_particles(n):
numberofparticles = n
r = np.random.rand(3*n)
v = np.random.rand(3*n)
mass = np.random.rand(n)
gpu_r = gpuarray.to_gpu(r)
gpu_v = gpuarray.to_gpu(v)
gpu_mass = gpuarray.to_gpu(mass)
return gpu_r, gpu_v, gpu_mass