def symbolise(self, last_result=None): self.show_visualisation() bounds = np.empty(shape=(self.height * self.depth, 2), dtype=np.float64) for k in range(self.height): bounds[k * 3] = [0.001, 1] # wavelength as a fraction of the window width bounds[k * 3 + 1] = [-1, 2] # offset where window is 0,1 bounds[k * 3 + 2] = [-1, 1] # amplitude. Can be negative self._working_results = np.zeros(shape=(self.width, self.height), dtype=np.float64) # cuda init self._factors = np.zeros(shape=(self.height * self.depth,), dtype=np.float64) # cuda.profile_start() self._d_working_results = cuda.to_device(self._working_results) grid_dim = self.width / 128, self.height - 1 block_dim = 128, 1, 1 self.compute_samples_configured = compute_sample_gpu.configure(grid_dim, block_dim) with cuda.pinned(self._factors, self._working_results): output = scipy.optimize.differential_evolution(self.evaluation_function_cuda, bounds=bounds, strategy='best2bin', maxiter=20, recombination=0.9, mutation=(0.0001, 0.3), tol=0.01, init='latinhypercube', popsize=15, disp=True, callback=self.step_callback) # cuda.profile_stop() self.evaluation_function_cuda(output.x) # raw_input("Press Enter to continue...") print(output)
def driver(pricer, pinned=False): paths = np.zeros((NumPath, NumStep + 1), order='F') paths[:, 0] = StockPrice DT = Maturity / NumStep if pinned: from numbapro import cuda with cuda.pinned(paths): ts = timer() pricer(paths, DT, InterestRate, Volatility) te = timer() else: ts = timer() pricer(paths, DT, InterestRate, Volatility) te = timer() ST = paths[:, -1] PaidOff = np.maximum(paths[:, -1] - StrikePrice, 0) print 'Result' fmt = '%20s: %s' print fmt % ('stock price', np.mean(ST)) print fmt % ('standard error', np.std(ST) / sqrt(NumPath)) print fmt % ('paid off', np.mean(PaidOff)) optionprice = np.mean(PaidOff) * exp(-InterestRate * Maturity) print fmt % ('option price', optionprice) print fmt % ('Paths ', NumPath) print fmt % ('NumStep ', NumStep) print 'Performance' NumCompute = NumPath * NumStep print fmt % ('Mstep/second', '%.2f' % (NumCompute / (te - ts) / 1e6)) print fmt % ('time elapsed', '%.3fs' % (te - ts)) if '--plot' in sys.argv: from matplotlib import pyplot pathct = min(NumPath, 100) for i in xrange(pathct): pyplot.plot(paths[i]) print 'Plotting %d/%d paths' % (pathct, NumPath) pyplot.show()
def driver(pricer, pinned=False): paths = np.zeros((NumPath, NumStep + 1), order='F') paths[:, 0] = StockPrice DT = Maturity / NumStep if pinned: from numbapro import cuda with cuda.pinned(paths): ts = timer() pricer(paths, DT, InterestRate, Volatility) te = timer() else: ts = timer() pricer(paths, DT, InterestRate, Volatility) te = timer() ST = paths[:, -1] PaidOff = np.maximum(paths[:, -1] - StrikePrice, 0) print 'Result' fmt = '%20s: %s' print fmt % ('stock price', np.mean(ST)) print fmt % ('standard error', np.std(ST) / sqrt(NumPath)) print fmt % ('paid off', np.mean(PaidOff)) optionprice = np.mean(PaidOff) * exp(-InterestRate * Maturity) print fmt % ('option price', optionprice) print 'Performance' NumCompute = NumPath * NumStep print fmt % ('Mstep/second', '%.2f' % (NumCompute / (te - ts) / 1e6)) print fmt % ('time elapsed', '%.3fs' % (te - ts)) if '--plot' in sys.argv: from matplotlib import pyplot pathct = min(NumPath, 100) for i in xrange(pathct): pyplot.plot(paths[i]) print 'Plotting %d/%d paths' % (pathct, NumPath) pyplot.show()
d_src = cuda.to_device(src) d_dst = cuda.device_array_like(dst) copy_kernel(d_src, out=d_dst) d_dst.copy_to_host(dst) te = timer() print 'regular', te - ts del d_src, d_dst assert np.allclose(dst, src) # Pinned (pagelocked) memory transfer with cuda.pinned(src, dst): ts = timer() stream = cuda.stream() # use stream to trigger async memory transfer d_src = cuda.to_device(src, stream=stream) d_dst = cuda.device_array_like(dst, stream=stream) copy_kernel(d_src, out=d_dst, stream=stream) d_dst.copy_to_host(dst, stream=stream) stream.synchronize() te = timer() print 'pinned', te - ts assert np.allclose(dst, src)
def main(): # Build Filter laplacian_pts = """ -4 -1 0 -1 -4 -1 2 3 2 -1 0 3 4 3 0 -1 2 3 2 -1 -4 -1 0 -1 -4 """.split() laplacian = np.array(laplacian_pts, dtype=np.float32).reshape(5, 5) # Build Image try: filename = sys.argv[1] image = ndimage.imread(filename, flatten=True).astype(np.float32) except IndexError: image = misc.lena().astype(np.float32) print("Image size: %s" % (image.shape,)) response = np.zeros_like(image) response[:5, :5] = laplacian # CPU ts = timer() cvimage_cpu = fftconvolve(image, laplacian, mode="same") te = timer() print("CPU: %.2fs" % (te - ts)) # GPU threadperblock = 32, 8 blockpergrid = best_grid_size(tuple(reversed(image.shape)), threadperblock) print("kernel config: %s x %s" % (blockpergrid, threadperblock)) # Trigger initialization the cuFFT system. # This takes significant time for small dataset. # We should not be including the time wasted here cufft.FFTPlan(shape=image.shape, itype=np.complex64, otype=np.complex64) # Start GPU timer ts = timer() image_complex = image.astype(np.complex64) response_complex = response.astype(np.complex64) stream1 = cuda.stream() stream2 = cuda.stream() fftplan1 = cufft.FFTPlan(shape=image.shape, itype=np.complex64, otype=np.complex64, stream=stream1) fftplan2 = cufft.FFTPlan(shape=image.shape, itype=np.complex64, otype=np.complex64, stream=stream2) # pagelock memory with cuda.pinned(image_complex, response_complex): # We can overlap the transfer of response_complex with the forward FFT # on image_complex. d_image_complex = cuda.to_device(image_complex, stream=stream1) d_response_complex = cuda.to_device(response_complex, stream=stream2) fftplan1.forward(d_image_complex, out=d_image_complex) fftplan2.forward(d_response_complex, out=d_response_complex) stream2.synchronize() mult_inplace[blockpergrid, threadperblock, stream1](d_image_complex, d_response_complex) fftplan1.inverse(d_image_complex, out=d_image_complex) # implicitly synchronizes the streams cvimage_gpu = d_image_complex.copy_to_host().real / np.prod(image.shape) te = timer() print("GPU: %.2fs" % (te - ts)) # Plot the results plt.subplot(1, 2, 1) plt.title("CPU") plt.imshow(cvimage_cpu, cmap=plt.cm.gray) plt.axis("off") plt.subplot(1, 2, 2) plt.title("GPU") plt.imshow(cvimage_gpu, cmap=plt.cm.gray) plt.axis("off") plt.show()