def jz(self, state): assert self.yp_tilde_pbo is not None floatsize = 4 #number of bytes, 32bit precision... bytesize = self.height * self.width #state.refresh() #state.render() self.pycuda_yp_tilde_pbo.unregister() self.pycuda_yp_fx_tilde_pbo.unregister() self.pycuda_yp_fy_tilde_pbo.unregister() self.pycuda_yp_m_tilde_pbo.unregister() self._pack_texture_into_PBO(self.yp_tilde_pbo, self.texid, bytesize, GL_UNSIGNED_BYTE) self._pack_texture_into_PBO(self.yp_fx_tilde_pbo, self.tex_fx_id, bytesize * floatsize, GL_FLOAT) self._pack_texture_into_PBO(self.yp_fy_tilde_pbo, self.tex_fy_id, bytesize * floatsize, GL_FLOAT) self._pack_texture_into_PBO(self.yp_m_tilde_pbo, self.tex_m_id, bytesize, GL_UNSIGNED_BYTE) self.pycuda_yp_tilde_pbo = cuda_gl.BufferObject(long( self.yp_tilde_pbo)) self.pycuda_yp_fx_tilde_pbo = cuda_gl.BufferObject( long(self.yp_fx_tilde_pbo)) self.pycuda_yp_fy_tilde_pbo = cuda_gl.BufferObject( long(self.yp_fy_tilde_pbo)) self.pycuda_yp_m_tilde_pbo = cuda_gl.BufferObject( long(self.yp_m_tilde_pbo)) #Copied perturbed image data to CUDA accessible memory, run the Cuda kernel return self._process_jz()
def create_PBOs(w,h): global source_pbo, dest_pbo, pycuda_source_pbo, pycuda_dest_pbo num_texels = w*h data = numpy.zeros((num_texels,4),numpy.uint8) source_pbo = glGenBuffers(1) glBindBuffer(GL_ARRAY_BUFFER, source_pbo) glBufferData(GL_ARRAY_BUFFER, data, GL_DYNAMIC_DRAW) glBindBuffer(GL_ARRAY_BUFFER, 0) pycuda_source_pbo = cuda_gl.BufferObject(long(source_pbo)) dest_pbo = glGenBuffers(1) glBindBuffer(GL_ARRAY_BUFFER, dest_pbo) glBufferData(GL_ARRAY_BUFFER, data, GL_DYNAMIC_DRAW) glBindBuffer(GL_ARRAY_BUFFER, 0) pycuda_dest_pbo = cuda_gl.BufferObject(long(dest_pbo))
def process_image(): """ copy image and process using CUDA """ global pycuda_source_pbo,source_pbo,current_size, dest_pbo image_width, image_height = current_size assert source_pbo is not None # tell cuda we are going to get into these buffers pycuda_source_pbo.unregister() # activate destination buffer glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, long(source_pbo)) # read data into pbo. note: use BGRA format for optimal performance glReadPixels( 0, #start x 0, #start y image_width, #end x image_height, #end y GL_BGRA, #format GL_UNSIGNED_BYTE, #output type ctypes.c_void_p(0)) pycuda_source_pbo = cuda_gl.BufferObject(long(source_pbo)) # run the Cuda kernel process(image_width, image_height) # blit convolved texture onto the screen # download texture from PBO glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, long(dest_pbo)) glBindTexture(GL_TEXTURE_2D, output_texture) glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, image_width, image_height, GL_BGRA, GL_UNSIGNED_BYTE, ctypes.c_void_p(0))
def create_PBOs(w, h): global dest_pbo, pycuda_dest_pbo num_texels = w * h dest_pbo = glGenBuffers(1) glBindBuffer(GL_PIXEL_UNPACK_BUFFER, dest_pbo) glBufferData(GL_PIXEL_UNPACK_BUFFER, num_texels * 4, None, GL_DYNAMIC_COPY) pycuda_dest_pbo = cuda_gl.BufferObject(long(dest_pbo))
def _initializePBO(self, data): pbo = glGenBuffers(1) glBindBuffer(GL_ARRAY_BUFFER, pbo) glBufferData(GL_ARRAY_BUFFER, data, GL_DYNAMIC_DRAW) glBindBuffer(GL_ARRAY_BUFFER, 0) pycuda_pbo = cuda_gl.BufferObject(long(pbo)) return (pbo, pycuda_pbo)
def j_multi(self, state, deltaX, ee, label): assert self.yp_tilde_pbo is not None floatsize = 4 rgbsize = 4 #32-bit bytesize = self.height*self.width #Perturb first #logging.debug('---------- Perturb and render first point') state.X[ee[:,0],0] = np.squeeze(state.X[ee[:,0],0] + deltaX) state.refresh(label, hess = True) state.render() state.X[ee[:,0],0] = np.squeeze(state.X[ee[:,0],0] - deltaX) #Load pixel buffers #logging.debug('---------- map into CUDA accessible memory') self.pycuda_yp_tilde_pbo.unregister() self.pycuda_yp_fx_tilde_pbo.unregister() self.pycuda_yp_fy_tilde_pbo.unregister() self.pycuda_yp_m_tilde_pbo.unregister() self._pack_texture_into_PBO(self.yp_tilde_pbo, self.texid, bytesize, GL_UNSIGNED_BYTE) self._pack_texture_into_PBO(self.yp_fx_tilde_pbo, self.tex_fx_id, bytesize*floatsize, GL_FLOAT) self._pack_texture_into_PBO(self.yp_fy_tilde_pbo, self.tex_fy_id, bytesize*floatsize, GL_FLOAT) self._pack_texture_into_PBO(self.yp_m_tilde_pbo, self.tex_m_id, bytesize*rgbsize, GL_UNSIGNED_BYTE, imageformat = GL_RGBA) self.pycuda_yp_tilde_pbo = cuda_gl.BufferObject(long(self.yp_tilde_pbo)) self.pycuda_yp_fx_tilde_pbo = cuda_gl.BufferObject(long(self.yp_fx_tilde_pbo)) self.pycuda_yp_fy_tilde_pbo = cuda_gl.BufferObject(long(self.yp_fy_tilde_pbo)) self.pycuda_yp_m_tilde_pbo = cuda_gl.BufferObject(long(self.yp_m_tilde_pbo)) #Perturb second #logging.debug('---------- Perturb and render second point') state.X[ee[:,1],0] = np.squeeze(state.X[ee[:,1],0] + deltaX) state.refresh(label, hess = True) state.render() state.X[ee[:,1],0] = np.squeeze(state.X[ee[:,1],0] - deltaX) #Load pixel buffers #logging.debug('---------- 2nd map into CUDA accessible memory. Unregister') self.pycuda_ypp_tilde_pbo.unregister() self.pycuda_ypp_fx_tilde_pbo.unregister() self.pycuda_ypp_fy_tilde_pbo.unregister() self.pycuda_ypp_m_tilde_pbo.unregister() logging.debug('---------- Pack texture into PBO') self._pack_texture_into_PBO(self.ypp_tilde_pbo, self.texid, bytesize, GL_UNSIGNED_BYTE) self._pack_texture_into_PBO(self.ypp_fx_tilde_pbo, self.tex_fx_id, bytesize*floatsize, GL_FLOAT) self._pack_texture_into_PBO(self.ypp_fy_tilde_pbo, self.tex_fy_id, bytesize*floatsize, GL_FLOAT) self._pack_texture_into_PBO(self.ypp_m_tilde_pbo, self.tex_m_id, bytesize*rgbsize, GL_UNSIGNED_BYTE, imageformat = GL_RGBA) #logging.debug('---------- Create cuda_gl BufferObjects') self.pycuda_ypp_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_tilde_pbo)) self.pycuda_ypp_fx_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_fx_tilde_pbo)) self.pycuda_ypp_fy_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_fy_tilde_pbo)) self.pycuda_ypp_m_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_m_tilde_pbo)) #Send to CUDA! logging.debug('---------- Perform reduction with CUDA') return self._process_j_multi()
def j(self, state, deltaX, i, j): assert self.yp_tilde_pbo is not None floatsize = 4 rgbsize = 4 #32-bit bytesize = self.height*self.width #Perturb first state.X[i,0] += deltaX state.refresh() state.render() state.X[i,0] -= deltaX #Load pixel buffers self.pycuda_yp_tilde_pbo.unregister() self.pycuda_yp_fx_tilde_pbo.unregister() self.pycuda_yp_fy_tilde_pbo.unregister() self.pycuda_yp_m_tilde_pbo.unregister() self._pack_texture_into_PBO(self.yp_tilde_pbo, self.texid, bytesize, GL_UNSIGNED_BYTE) self._pack_texture_into_PBO(self.yp_fx_tilde_pbo, self.tex_fx_id, bytesize*floatsize, GL_FLOAT) self._pack_texture_into_PBO(self.yp_fy_tilde_pbo, self.tex_fy_id, bytesize*floatsize, GL_FLOAT) self._pack_texture_into_PBO(self.yp_m_tilde_pbo, self.tex_m_id, bytesize*rgbsize, GL_UNSIGNED_BYTE, imageformat = GL_RGBA) self.pycuda_yp_tilde_pbo = cuda_gl.BufferObject(long(self.yp_tilde_pbo)) self.pycuda_yp_fx_tilde_pbo = cuda_gl.BufferObject(long(self.yp_fx_tilde_pbo)) self.pycuda_yp_fy_tilde_pbo = cuda_gl.BufferObject(long(self.yp_fy_tilde_pbo)) self.pycuda_yp_m_tilde_pbo = cuda_gl.BufferObject(long(self.yp_m_tilde_pbo)) #Perturb second state.X[j,0] += deltaX state.refresh() state.render() state.X[j,0] -= deltaX #Load pixel buffers self.pycuda_ypp_tilde_pbo.unregister() self.pycuda_ypp_fx_tilde_pbo.unregister() self.pycuda_ypp_fy_tilde_pbo.unregister() self.pycuda_ypp_m_tilde_pbo.unregister() self._pack_texture_into_PBO(self.ypp_tilde_pbo, self.texid, bytesize, GL_UNSIGNED_BYTE) self._pack_texture_into_PBO(self.ypp_fx_tilde_pbo, self.tex_fx_id, bytesize*floatsize, GL_FLOAT) self._pack_texture_into_PBO(self.ypp_fy_tilde_pbo, self.tex_fy_id, bytesize*floatsize, GL_FLOAT) self._pack_texture_into_PBO(self.ypp_m_tilde_pbo, self.tex_m_id, bytesize*rgbsize, GL_UNSIGNED_BYTE, imageformat = GL_RGBA) self.pycuda_ypp_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_tilde_pbo)) self.pycuda_ypp_fx_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_fx_tilde_pbo)) self.pycuda_ypp_fy_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_fy_tilde_pbo)) self.pycuda_ypp_m_tilde_pbo = cuda_gl.BufferObject(long(self.ypp_m_tilde_pbo)) #Send to CUDA! return self._process_j()
def initData(fn=None): global pixels, array, pbo_buffer, cuda_pbo_resource, imWidth, imHeight, texid # Cuda array initialization array = cuda_driver.matrix_to_array( pixels, "C") # C-style instead of Fortran-style: row-major pixels.fill(0) # Resetting the array to 0 pbo_buffer = glGenBuffers(1) # generate 1 buffer reference glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo_buffer) # binding to this buffer glBufferData(GL_PIXEL_UNPACK_BUFFER, imWidth * imHeight, pixels, GL_STREAM_DRAW) # Allocate the buffer bsize = glGetBufferParameteriv( GL_PIXEL_UNPACK_BUFFER, GL_BUFFER_SIZE) # Check allocated buffer size assert (bsize == imWidth * imHeight) glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0) # Unbind if ver2011: cuda_pbo_resource = pycuda.gl.RegisteredBuffer( int(pbo_buffer), cuda_gl.graphics_map_flags.WRITE_DISCARD) else: cuda_pbo_resource = cuda_gl.BufferObject( int(pbo_buffer)) # Mapping GLBuffer to cuda_resource glGenTextures(1, texid) # generate 1 texture reference glBindTexture(GL_TEXTURE_2D, texid) # binding to this texture glTexImage2D(GL_TEXTURE_2D, 0, GL_LUMINANCE, imWidth, imHeight, 0, GL_LUMINANCE, GL_UNSIGNED_BYTE, None) # Allocate the texture glBindTexture(GL_TEXTURE_2D, 0) # Unbind glPixelStorei(GL_UNPACK_ALIGNMENT, 1) # 1-byte row alignment glPixelStorei(GL_PACK_ALIGNMENT, 1) # 1-byte row alignment
def initjacobian(self, y_im_flip, y_flow, y_m_flip, test=False): #Copy y_im, y_fx, y_fy to GPU and copy y_tilde, y_fx_tilde, y_fy_tilde to GPU print 'initjac: y_tilde_pbo', y_tilde_pbo y_im = np.flipud(y_im_flip) y_m = np.flipud(y_m_flip) yfx = np.flipud(y_flow[:, :, 0]) yfy = np.flipud(y_flow[:, :, 0]) y_flow = np.dstack((yfx, yfy)) #Tell cuda we are going to get into these buffers self.pycuda_y_tilde_pbo.unregister() self.pycuda_y_fx_tilde_pbo.unregister() self.pycuda_y_fy_tilde_pbo.unregister() self.pycuda_y_m_tilde_pbo.unregister() self.pycuda_y_im_pbo.unregister() self.pycuda_y_fx_pbo.unregister() self.pycuda_y_fy_pbo.unregister() self.pycuda_y_m_pbo.unregister() ######################################################################## #y_im################################################################### ######################################################################## #Load buffer for packing bytesize = self.height * self.width self._pack_texture_into_PBO(self.y_tilde_pbo, self.texid, bytesize, GL_UNSIGNED_BYTE) #Load y_im (current frame) info from CPU memory glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, long(self.y_im_pbo)) glBufferData(GL_PIXEL_PACK_BUFFER_ARB, self.width * self.height, y_im, GL_STREAM_DRAW) glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0) glDisable(GL_TEXTURE_2D) ######################################################################## #y_fx################################################################### ######################################################################## floatsize = 4 #32-bit self._pack_texture_into_PBO(self.y_fx_tilde_pbo, self.tex_fx_id, bytesize * floatsize, GL_FLOAT) #Load y_fx (current frame) info from CPU memory glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, long(self.y_fx_pbo)) glBufferData(GL_PIXEL_PACK_BUFFER_ARB, self.width * self.height * floatsize, y_flow[:, :, 0], GL_STREAM_DRAW) glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0) glDisable(GL_TEXTURE_2D) ######################################################################## #y_fy################################################################### ######################################################################## self._pack_texture_into_PBO(self.y_fy_tilde_pbo, self.tex_fy_id, bytesize * floatsize, GL_FLOAT) #Load y_fx (current frame) info from CPU memory glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, long(self.y_fy_pbo)) glBufferData(GL_PIXEL_PACK_BUFFER_ARB, self.width * self.height * floatsize, y_flow[:, :, 1], GL_STREAM_DRAW) glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0) glDisable(GL_TEXTURE_2D) ######################################################################## #y_m#################################################################### ######################################################################## #Load buffer for packing bytesize = self.height * self.width self._pack_texture_into_PBO(self.y_m_tilde_pbo, self.tex_m_id, bytesize, GL_UNSIGNED_BYTE) #Load y_im (current frame) info from CPU memory glBindBufferARB(GL_PIXEL_PACK_BUFFER_ARB, long(self.y_m_pbo)) glBufferData(GL_PIXEL_PACK_BUFFER_ARB, self.width * self.height, y_m, GL_STREAM_DRAW) glBindBuffer(GL_PIXEL_PACK_BUFFER_ARB, 0) glDisable(GL_TEXTURE_2D) self.pycuda_y_tilde_pbo = cuda_gl.BufferObject(long(self.y_tilde_pbo)) self.pycuda_y_fx_tilde_pbo = cuda_gl.BufferObject( long(self.y_fx_tilde_pbo)) self.pycuda_y_fy_tilde_pbo = cuda_gl.BufferObject( long(self.y_fy_tilde_pbo)) self.pycuda_y_m_tilde_pbo = cuda_gl.BufferObject( long(self.y_m_tilde_pbo)) self.pycuda_y_im_pbo = cuda_gl.BufferObject(long(self.y_im_pbo)) self.pycuda_y_fx_pbo = cuda_gl.BufferObject(long(self.y_fx_pbo)) self.pycuda_y_fy_pbo = cuda_gl.BufferObject(long(self.y_fy_pbo)) self.pycuda_y_m_pbo = cuda_gl.BufferObject(long(self.y_m_pbo)) #Loaded all into CUDA accessible memory, can test loaded with the following if test: return self._process_initjac_test(TEST_IMAGE) else: return None