-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_test.py
96 lines (79 loc) · 2.92 KB
/
main_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from pyfft.cuda import Plan
import numpy as np
import matplotlib.pyplot as plt
import pycuda.driver as cuda
from pycuda.tools import make_default_context
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
def nearest_2power(n):
return np.power(2.,(np.ceil(np.log2(n))))
@profile
def main(context, stream, plan1, N1, N2, g_buf1, g_buf2):
#N1 = # ffts applied
#N2 = dim of ffts
x = np.linspace(0, 2 * np.pi, N2)
y = np.sin(2 * x)
ys = y
ys = ys.reshape(1,N2)
y = np.concatenate((y,np.zeros(nearest_2power(N2)-N2)))
y = y.reshape(1,nearest_2power(N2))
for i in xrange(N1-1): #append N1-1 sines
yi = np.sin(2 * (i+2) * x)
yis = yi
yis = yis.reshape(1,N2)
ys = np.concatenate(((ys),(yis)),0)
yi = np.concatenate((yi,np.zeros(nearest_2power(N2)-N2)))
yi = yi.reshape(1,nearest_2power(N2))
y = np.concatenate(((y),(yi)),0)
y = y.transpose()
yim= np.zeros(y.shape)
y = np.array(y,np.float64)
yw = y.transpose()
yimw = yim.transpose()
aw = np.fft.fft(ys,int(nearest_2power(N2)),1)
bw = np.real(np.fft.ifft(aw,int(nearest_2power(N2)),1))
aw0 = np.fft.fft(y,int(nearest_2power(N2)),0)
bw0 = np.real(np.fft.ifft(aw0,int(nearest_2power(N2)),0))
gpu_testmat = gpuarray.to_gpu(y)
gpu_testmatim = gpuarray.to_gpu(yim)
plan1.execute(gpu_testmat, gpu_testmatim, batch=N1)
gfft = gpu_testmat.get() #get fft result
plan1.execute(gpu_testmat, gpu_testmatim, inverse=True, batch=N1)
gifft = np.real(gpu_testmat.get()) #get ifft result
cuda.memcpy_htod(g_buf1, y)
cuda.memset_d32(g_buf2, 0, yim.size*2) # double all zero bits should be zero again. This function only works for 32 bit, so we need twice as many
plan1.execute(g_buf1, g_buf2, batch=N1)
grfft=np.empty_like(y)
cuda.memcpy_dtoh(grfft, g_buf1) #fft result
plan1.execute(g_buf1, g_buf2, inverse=True, batch=N1)
grifft=np.empty_like(y)
cuda.memcpy_dtoh(grifft, g_buf1) #ifft result
if Plot:
np.set_printoptions(threshold=np.nan)
#plot cuda fft results
f, axarr = plt.subplots(5, sharex=False)
axarr[0].plot(y)
axarr[1].plot(gfft)
axarr[2].plot(gifft)
axarr[3].plot(grfft)
axarr[4].plot(grifft)
plt.show()
raise SystemExit
#set dimensions
N1 = 50
N2 = 128
N = 1000
Plot = False
#init cuda
cuda.init()
context = make_default_context()
stream = cuda.Stream()
plan1 = Plan(int(nearest_2power(N2)), dtype=np.float64, context=context, stream=stream, fast_math=False)
test = gpuarray.to_gpu(np.array([1, 0]))
g_buf1 = cuda.mem_alloc(int(8*N1*nearest_2power(N2)))
g_buf2 = cuda.mem_alloc(int(8*N1*nearest_2power(N2)))
#do operation N times for profiling purposes
for i in xrange(N):
main(context=context, stream=stream, plan1=plan1, N1=N1, N2=N2, g_buf1=g_buf1, g_buf2=g_buf2)
#destroy cuda context
context.pop()