def shutdown(): global _handles, _pid pid = os.getpid() if _pid != pid: # not initialized return for handle in _handles.itervalues(): libcudnn.cudnnDestroy(handle) _handles = {} _pid = None # mark as uninitialized
libcudnn.cudnnConvolutionForward(cudnn_context, alpha, X_desc, X_data, filters_desc, filters_data, conv_desc, algo, ws_data, ws_size.value, beta, Y_desc, Y_data) start_bench() for i in range(10): libcudnn.cudnnConvolutionForward(cudnn_context, alpha, X_desc, X_data, filters_desc, filters_data, conv_desc, algo, ws_data, ws_size.value, beta, Y_desc, Y_data) ms = end_bench() ws_ptr = None libcudnn.cudnnDestroyTensorDescriptor(X_desc) libcudnn.cudnnDestroyTensorDescriptor(Y_desc) libcudnn.cudnnDestroyFilterDescriptor(filters_desc) libcudnn.cudnnDestroyConvolutionDescriptor(conv_desc) return ms / 10 # for kw in range(1, 11): # for kh in range(1, 11): # ms = benchmark_conv(kw, kh) # print("%dx%d : %fms" % (kw, kh, ms)) for bsz in range(1, 32): ms = benchmark_conv(11, 11, bsz) print("%d : %.2fms => %f img/sec" % (bsz, ms, bsz/ms)) # Clean up libcudnn.cudnnDestroy(cudnn_context)
maxU = parU[0:1,0:1] maxo = ng.max(abs(cuO - nlO.T), partial=parO, out=maxO).get()[0,0] maxb = ng.max(abs(cuB - nlB.T), partial=parB, out=maxB).get()[0,0] maxu = ng.max(abs(cuU - nlU.T), partial=parU, out=maxU).get()[0,0] meano = ng.mean(abs(cuO), partial=parO, out=maxO).get()[0,0] meanb = ng.mean(abs(cuB), partial=parB, out=maxB).get()[0,0] meanu = ng.mean(abs(cuU), partial=parU, out=maxU).get()[0,0] print " maxerr mean pct" print "fprop: %7.5f %6.2f %5.3f" % (maxo, meano, 100*maxo/meano) print "bprop: %7.5f %6.2f %5.3f" % (maxb, meanb, 100*maxb/meanb) print "updat: %7.5f %6.2f %5.3f" % (maxu, meanu, 100*maxu/meanu) # free up memory from this layer before proceeding cuB = cuU = cuO = None nlB = nlU = nlO = None parO = parB = parU = maxO = maxB = maxU = None libcudnn.cudnnDestroyTensorDescriptor(I_desc) libcudnn.cudnnDestroyTensorDescriptor(O_desc) libcudnn.cudnnDestroyFilterDescriptor(F_desc) libcudnn.cudnnDestroyTensorDescriptor(E_desc) libcudnn.cudnnDestroyTensorDescriptor(B_desc) libcudnn.cudnnDestroyFilterDescriptor(U_desc) libcudnn.cudnnDestroyConvolutionDescriptor(C_desc) libcudnn.cudnnDestroy(cudnn)
X_data = ctypes.c_void_p(int(X.gpudata)) filters_data = ctypes.c_void_p(int(filters.gpudata)) Y_data = ctypes.c_void_p(int(Y.gpudata)) # Perform convolution algo = libcudnn.cudnnGetConvolutionForwardAlgorithm(cudnn_context, X_desc, filters_desc, conv_desc, Y_desc, convolution_fwd_pref, 0) print("Cudnn algorithm = %d" % algo.value) ws_size = libcudnn.cudnnGetConvolutionForwardWorkspaceSize(cudnn_context, X_desc, filters_desc, conv_desc, Y_desc, algo) ws_ptr = drv.mem_alloc(ws_size.value) if ws_size.value > 0 else 0 ws_data = ctypes.c_void_p(int(ws_ptr)) start_bench() libcudnn.cudnnConvolutionForward(cudnn_context, alpha, X_desc, X_data, filters_desc, filters_data, conv_desc, algo, ws_data, ws_size.value, beta, Y_desc, Y_data) end_bench("fprop") ws_ptr = None # Clean up libcudnn.cudnnDestroyTensorDescriptor(X_desc) libcudnn.cudnnDestroyTensorDescriptor(Y_desc) libcudnn.cudnnDestroyFilterDescriptor(filters_desc) libcudnn.cudnnDestroyConvolutionDescriptor(conv_desc) libcudnn.cudnnDestroy(cudnn_context)
maxB = parB[0:1, 0:1] maxU = parU[0:1, 0:1] maxo = ng.max(abs(cuO - nlO.T), partial=parO, out=maxO).get()[0, 0] maxb = ng.max(abs(cuB - nlB.T), partial=parB, out=maxB).get()[0, 0] maxu = ng.max(abs(cuU - nlU.T), partial=parU, out=maxU).get()[0, 0] meano = ng.mean(abs(cuO), partial=parO, out=maxO).get()[0, 0] meanb = ng.mean(abs(cuB), partial=parB, out=maxB).get()[0, 0] meanu = ng.mean(abs(cuU), partial=parU, out=maxU).get()[0, 0] print " maxerr mean pct" print "fprop: %7.5f %6.2f %5.3f" % (maxo, meano, 100 * maxo / meano) print "bprop: %7.5f %6.2f %5.3f" % (maxb, meanb, 100 * maxb / meanb) print "updat: %7.5f %6.2f %5.3f" % (maxu, meanu, 100 * maxu / meanu) # free up memory from this layer before proceeding cuB = cuU = cuO = None nlB = nlU = nlO = None parO = parB = parU = maxO = maxB = maxU = None libcudnn.cudnnDestroyTensorDescriptor(I_desc) libcudnn.cudnnDestroyTensorDescriptor(O_desc) libcudnn.cudnnDestroyFilterDescriptor(F_desc) libcudnn.cudnnDestroyTensorDescriptor(E_desc) libcudnn.cudnnDestroyTensorDescriptor(B_desc) libcudnn.cudnnDestroyFilterDescriptor(U_desc) libcudnn.cudnnDestroyConvolutionDescriptor(C_desc) libcudnn.cudnnDestroy(cudnn)