start.record() plan.execute(gpuimage1, gpuresult1, tran16, trandata) #execute 2D FFT on image1 plan.execute(gpuimage2, gpuresult2, tran16, trandata) #execute 2D FFT on image2 stop.record() stop.synchronize() exec_time = stop.time_since(start) #transfer back the results to test for correctness cuda.memcpy_dtoh(view1, gpuresult1) cuda.memcpy_dtoh(view2, gpuresult2) displayResults(view1.real,title="FFT 1") displayResults(view2.real,title="FFT 2") start.record() ccmult(gpuresult1, gpuresult2, np.int32(nx), np.int32(ny), block=(16,16,1), grid=trandata.grid) stop.record() stop.synchronize() ccmult_time = stop.time_since(start) cuda.memcpy_dtoh(ccview, gpuresult1) displayResults(ccview.real,title="CCMULT Result") start.record() plan.execute(gpuresult1, gpuresult2, tran16, trandata, reverse=True ) #inverse FFT stop.record() stop.synchronize() ifft_time = stop.time_since(start) cuda.memcpy_dtoh(ifftview, gpuresult2)
start.record() plan.execute(gpuimage1, gpuresult1, tran16, trandata) #execute 2D FFT on image1 plan.execute(gpuimage2, gpuresult2, tran16, trandata) #execute 2D FFT on image2 stop.record() stop.synchronize() exec_time = stop.time_since(start) #transfer back the results to test for correctness cuda.memcpy_dtoh(view1, gpuresult1) cuda.memcpy_dtoh(view2, gpuresult2) displayResults(view1.real,title="FFT 1") displayResults(view2.real,title="FFT 2") start.record() ccmult(gpuresult1, gpuresult2, np.int32(nx), np.int32(ny), block=(16,16,1), grid=(nx/16,ny/16)) stop.record() stop.synchronize() ccmult_time = stop.time_since(start) cuda.memcpy_dtoh(ccview, gpuresult1) displayResults(ccview.real,title="CCMULT Result") start.record() plan.execute(gpuresult1, gpuresult2, tran16, trandata, reverse=True ) #inverse FFT stop.record() stop.synchronize() ifft_time = stop.time_since(start) cuda.memcpy_dtoh(ifftview, gpuresult2)