def load_data_gl(self, pos_vbo, col_vbo, vel): mf = cl.mem_flags self.pos_vbo = pos_vbo self.col_vbo = col_vbo self.pos = pos_vbo.data self.col = col_vbo.data self.vel = vel #Setup vertex buffer objects and share them with OpenCL as GLBuffers self.pos_vbo.bind() self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffers[0])) self.col_vbo.bind() self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffers[0])) #pure OpenCL arrays self.vel_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=vel) self.pos_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.pos) self.vel_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.vel) self.queue.finish() # set up the list of GL objects to share with opencl self.gl_objects = [self.pos_cl, self.col_cl]
def setup(self): self.platform = cl.get_platforms()[0] if self.opengl: self.context = cl.Context( properties=[(cl.context_properties.PLATFORM, self.platform)] + get_gl_sharing_context_properties()) else: self.context = cl.Context( properties=[(cl.context_properties.PLATFORM, self.platform)]) self.queue = cl.CommandQueue(self.context) self.program = cl.Program(self.context, self.kernel_src).build( '-cl-single-precision-constant -cl-opt-disable') self.cl_particle_velocity_a = cl.Buffer(self.context, mf.COPY_HOST_PTR, hostbuf=self.np_particle_velocity) self.cl_particle_velocity_b = cl.Buffer(self.context, mf.COPY_HOST_PTR, hostbuf=self.np_particle_velocity) if self.opengl: self.gl_particle_position_a = vbo.VBO(data=self.np_particle_position, usage=gl.GL_DYNAMIC_DRAW, target=gl.GL_ARRAY_BUFFER) self.gl_particle_position_a.bind() self.gl_particle_position_b = vbo.VBO(data=self.np_particle_position, usage=gl.GL_DYNAMIC_DRAW, target=gl.GL_ARRAY_BUFFER) self.gl_particle_position_b.bind() self.cl_particle_position_a = cl.GLBuffer(self.context, mf.READ_WRITE, int(self.gl_particle_position_a)) self.cl_particle_position_b = cl.GLBuffer(self.context, mf.READ_WRITE, int(self.gl_particle_position_b)) else: self.cl_particle_position_a = cl.Buffer(self.context, mf.COPY_HOST_PTR, hostbuf=self.np_particle_position) self.cl_particle_position_b = cl.Buffer(self.context, mf.COPY_HOST_PTR, hostbuf=self.np_particle_position) self.cl_last_collide = cl.Buffer(self.context, mf.COPY_HOST_PTR, hostbuf=self.np_last_collide) self.cl_particle_velocity_norms = cl.Buffer(self.context, mf.COPY_HOST_PTR, hostbuf=self.np_particle_velocity_norms)
def loadData(self, pos_vbo, col_vbo, vel, acc): import pyopencl as cl mf = cl.mem_flags self.pos_vbo = pos_vbo self.col_vbo = col_vbo self.pos = pos_vbo.data self.col = col_vbo.data self.vel = vel #Setup vertex buffer objects and share them with OpenCL as GLBuffers self.pos_vbo.bind() self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffers[0])) self.col_vbo.bind() self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffers[0])) #pure OpenCL arrays self.vel_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=vel) self.acc_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=acc) self.steer_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=vel) #these values not used, just same shape self.avg_pos_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=vel) #these values not used, just same shape self.avg_vel_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=vel) #these values not used, just same shape self.queue.finish() # set up the list of GL objects to share with opencl self.gl_objects = [self.pos_cl, self.col_cl] self.global_size = (self.num,) self.local_size = None print self.global_size
def loadData(self, pos_vbo, col_vbo, time, props): import pyopencl as cl mf = cl.mem_flags self.pos_vbo = pos_vbo self.col_vbo = col_vbo self.pos = pos_vbo.data self.col = col_vbo.data self.time = time self.props = props #Setup vertex buffer objects and share them with OpenCL as GLBuffers self.pos_vbo.bind() self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffers[0])) self.col_vbo.bind() self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffers[0])) #pure OpenCL arrays self.time_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.time) self.props_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.props) self.params_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.params) self.queue.finish() # set up the list of GL objects to share with opencl self.gl_objects = [self.pos_cl, self.col_cl]
def cl_init(self): t0 = time.time() self.body_positions_cl_buffer = cl.GLBuffer( cl_context, mf.READ_WRITE, int(self.body_positions_vbo)) self.body_velocities_cl_buffer = cl.GLBuffer( cl_context, mf.READ_WRITE, int(self.body_velocities_vbo)) self.body_colors_cl_buffer = cl.GLBuffer(cl_context, mf.READ_WRITE, int(self.body_colors_vbo)) self._generate_bodies()
def loadData(self): mf = cl.mem_flags self.A_cl = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.A) self.pos_vbo.bind() self.X_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, self.pos_vbo.buffer) self.col_vbo.bind() self.I_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, self.col_vbo.buffer) self.queue.finish() return self
def __init_cl_stuff(self): # Figure out platform and device platform = cl.get_platforms()[0] if sys.platform == "darwin": self.context = cl.Context( properties=get_gl_sharing_context_properties(), devices=[]) else: # Some OSs prefer clCreateContextFromType, some prefer clCreateContext. Try both. try: self.context = cl.Context( properties=[(cl.context_properties.PLATFORM, platform)] + get_gl_sharing_context_properties()) except: self.context = cl.Context( properties=[(cl.context_properties.PLATFORM, platform)] + get_gl_sharing_context_properties(), devices=[platform.get_devices()[0]]) print(Style.BRIGHT + 'DEVICE: \t' + Style.RESET_ALL, self.context.devices[0]) print(Style.BRIGHT + 'VERSION: \t' + Style.RESET_ALL, self.context.devices[0].get_info(cl.device_info.VERSION)) # Make OpenCL buffers, one for each OpenGL VBO self.position_buffer = cl.GLBuffer(self.context, cl.mem_flags.READ_WRITE, int(self.position_vbo)) self.color_buffer = cl.GLBuffer(self.context, cl.mem_flags.READ_WRITE, int(self.color_vbo)) self.lifetime_buffer = cl.GLBuffer(self.context, cl.mem_flags.READ_WRITE, int(self.lifetime_vbo)) self.gl_buffers = (self.position_buffer, self.color_buffer, self.lifetime_buffer) # Make other OpenCL buffers self.seed_buffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE, self.n_particles * 8) # buffer of ulong self.velocity_buffer = cl.Buffer(self.context, cl.mem_flags.READ_WRITE, self.n_particles * 4 * 4) # buffer of float4 # Compile kernel program try: program_src = file_to_string(KERNEL_FILENAME) self.kernel = cl.Program(self.context, program_src).build() except cl.RuntimeError as e: raise ParticleSystemException('Error compiling ' + KERNEL_FILENAME + '\n' + str(e)) # Will need a command queue to run kernel self.queue = cl.CommandQueue(self.context)
def __init__(self, gl_positions, gl_colors, velocities, dt=0.001): # First, we have to initialize the OpenCL context. That means we have # to get a list of available platforms and select one: platform = cl.get_platforms()[0] # Then, we can create a context. Passing # <code>get_gl_sharing_context_properties()</code> as a property # ensures that we share state with the active OpenGL context: self.ctx = cl.Context( properties=[(cl.context_properties.PLATFORM, platform)] + get_gl_sharing_context_properties(), devices=[platform.get_devices()[0]]) # A command queue is necessary for serializing OpenCL commands: self.queue = cl.CommandQueue(self.ctx) # Finally, we can compile the kernel: self.program = cl.Program(self.ctx, kernel_code).build() # The constructor parameters are stored for later use: self.gl_positions = gl_positions self.gl_colors = gl_colors self.velocities = velocities # The <code>dt</code> value will later be passed to an OpenCL kernel as # a 32-bit float. We therefore wrap it in a numpy <code>float32</code> # object: self.dt = numpy.float32(dt) # Next, we generate OpenCL buffers. The positions and colors are # contained in OpenGL buffers, which we wrap in PyOpenCL's # <code>GLBuffer</code> class: self.cl_positions = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE, self.gl_positions._id) self.cl_colors = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE, self.gl_colors._id) # Note how we had to extract the <code>_id</code>s from the # <code>ArrayBuffer</code> objects. In pure <i>glitter</i> code, you # should never (have to) access this value; however for interaction # with other OpenGL-related libraries, this cannot always be avoided. # The velocities are given as a numpy array, which is simply uploaded # into a new OpenCL <code>Buffer</code> object along with the initial # values of the positions and colors: self.cl_velocities = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=velocities) self.cl_initial_positions = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.gl_positions.data) self.cl_initial_velocities = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.velocities)
def create(self): self.vbo.activate() self.ctx, self.queue = clinit() self.glclbuf = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE, int(self.vbo.handle)) self.program = cl.Program(self.ctx, kernel).build() self.queue.finish()
def initialize(): plats = cl.get_platforms() ctx_props = cl.context_properties props = [(ctx_props.PLATFORM, plats[0]), (ctx_props.GL_CONTEXT_KHR, platform.GetCurrentContext())] import sys if sys.platform == "linux2": props.append((ctx_props.GLX_DISPLAY_KHR, GLX.glXGetCurrentDisplay())) elif sys.platform == "win32": props.append((ctx_props.WGL_HDC_KHR, WGL.wglGetCurrentDC())) ctx = cl.Context(properties=props) glClearColor(1, 1, 1, 1) glColor(0, 0, 1) vbo = glGenBuffers(1) glBindBuffer(GL_ARRAY_BUFFER, vbo) rawGlBufferData(GL_ARRAY_BUFFER, n_vertices * 2 * 4, None, GL_STATIC_DRAW) glEnableClientState(GL_VERTEX_ARRAY) glVertexPointer(2, GL_FLOAT, 0, None) coords_dev = cl.GLBuffer(ctx, cl.mem_flags.READ_WRITE, int(vbo)) prog = cl.Program(ctx, src).build() queue = cl.CommandQueue(ctx) cl.enqueue_acquire_gl_objects(queue, [coords_dev]) prog.generate_sin(queue, (n_vertices, ), None, coords_dev) cl.enqueue_release_gl_objects(queue, [coords_dev]) queue.finish() glFlush()
def loadData(self, pos_vbo, col_vbo, vel): import pyopencl as cl mf = cl.mem_flags self.pos_vbo = pos_vbo self.col_vbo = col_vbo self.pos = pos_vbo.data self.col = col_vbo.data self.vel = vel #Setup vertex buffer objects and share them with OpenCL as GLBuffers self.pos_vbo.bind() #For some there is no single buffer but an array of buffers #https://github.com/enjalot/adventures_in_opencl/commit/61bfd373478767249fe8a3aa77e7e36b22d453c4 try: self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffer)) self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffer)) except AttributeError: self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffers[0])) self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffers[0])) self.col_vbo.bind() #pure OpenCL arrays self.vel_cl = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=vel) self.pos_gen_cl = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.pos) self.vel_gen_cl = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=self.vel) self.queue.finish() # set up the list of GL objects to share with opencl self.gl_objects = [self.pos_cl, self.col_cl]
def get_cl_array(self, queue): """ returns pyopencl array allocated to the vbo. note that interoperatibility must be enabled. """ if self._cl_buffer is None: allocator = lambda b: cl.GLBuffer(ctx, cl.mem_flags.READ_WRITE, int(self.gl_vbo_id)) self._cl_array = cl.array.Array(queue, self.shape, self.dtype, allocator=allocator) return self._cl_array
def initial_buffers(): data = numpy.ndarray((num_points, 2), dtype=numpy.float32) data[:,:] = [0.,1.] cl_buffer = cl.Buffer(context, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=data) gl_buffer = vbo.VBO(data=data, usage=GL_DYNAMIC_DRAW, target=GL_ARRAY_BUFFER) # gl_buffer.bind() buffer_name = glGenBuffers(1) # Generate the OpenGL Buffer name glBindBuffer(GL_ARRAY_BUFFER, buffer_name) # Bind the vertex buffer to a target rawGlBufferData(GL_ARRAY_BUFFER, num_points * 2 * 4, None, GL_DYNAMIC_DRAW) # Allocate memory for the buffer glEnableClientState(GL_VERTEX_ARRAY) # The vertex array is enabled for client writing and used for rendering glVertexPointer(2, GL_FLOAT, 0, None) # Define an array of vertex data (size xyz, type, stride, pointer) cl_gl_buffer = cl.GLBuffer(context, cl.mem_flags.READ_WRITE, int(buffer_name)) return (cl_buffer, gl_buffer, cl_gl_buffer)
def initialize_buffers(self): """Initialize OpenGL and OpenCL buffers and interop objects, and compile the OpenCL kernel. """ self.glbuf = gloo.VertexBuffer(data=self.data) self.prog = gloo.Program(vertex, fragment) self.prog["position"] = self.glbuf self.glbuf.activate() # WARNING: it seems that on some systems, the CL initialization # NEEDS to occur AFTER the activation of the GL object. # initialize the CL context self.ctx, self.queue = clinit() # create an interop object to access to GL VBO from OpenCL self.glclbuf = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE, int(self.glbuf.handle)) # build the OpenCL program self.program = cl.Program(self.ctx, clkernel).build() # release the PyOpenCL queue self.queue.finish()
def initialize_buffers(self): """Initialize OpenGL and OpenCL buffers and interop objects, and compile the OpenCL kernel. """ # empty OpenGL VBO self.glbuf = glvbo.VBO(data=np.zeros(self.data.shape), usage=gl.GL_DYNAMIC_DRAW, target=gl.GL_ARRAY_BUFFER) self.glbuf.bind() # initialize the CL context self.ctx, self.queue = clinit() # create a pure read-only OpenCL buffer self.clbuf = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.data) # create an interop object to access to GL VBO from OpenCL self.glclbuf = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE, int(self.glbuf.buffers[0])) # build the OpenCL program self.program = cl.Program(self.ctx, clkernel).build() # release the PyOpenCL queue self.queue.finish()
def __init__(self, lattice, grid): self.lattice = lattice self.context = self.lattice.context self.queue = self.lattice.queue self.float_type = self.lattice.memory.float_type self.count = len(grid) self.np_particles = numpy.ndarray(shape=(self.count, 4), dtype=self.float_type) self.np_init_particles = numpy.ndarray(shape=(self.count, 4), dtype=self.float_type) if len(grid[0, :]) == 2: self.np_particles[:, 0:2] = grid self.np_particles[:, 2] = 0 self.np_particles[:, 3] = numpy.random.sample(self.count) self.np_init_particles = self.np_particles elif len(grid[0, :]) == 3: self.np_particles[:, 0:3] = grid self.np_particles[:, 3] = numpy.random.sample(self.count) self.np_init_particles = self.np_particles self.gl_particles = vbo.VBO(data=self.np_particles, usage=gl.GL_DYNAMIC_DRAW, target=gl.GL_ARRAY_BUFFER) self.gl_particles.bind() self.cl_gl_particles = cl.GLBuffer(self.context, mf.READ_WRITE, int(self.gl_particles)) self.cl_init_particles = cl.Buffer(self.context, mf.READ_ONLY, size=self.count * 4 * numpy.float32(0).nbytes) cl.enqueue_copy(self.queue, self.cl_init_particles, self.np_init_particles).wait() self.build_kernel()
def initialize(): platform = cl.get_platforms()[0] from pyopencl.tools import get_gl_sharing_context_properties import sys if sys.platform == "darwin": ctx = cl.Context(properties=get_gl_sharing_context_properties(), devices=[]) else: # Some OSs prefer clCreateContextFromType, some prefer # clCreateContext. Try both. try: ctx = cl.Context( properties=[(cl.context_properties.PLATFORM, platform)] + get_gl_sharing_context_properties()) except: ctx = cl.Context( properties=[(cl.context_properties.PLATFORM, platform)] + get_gl_sharing_context_properties(), devices=[platform.get_devices()[0]]) glClearColor(1, 1, 1, 1) glColor(0, 0, 1) vbo = glGenBuffers(1) glBindBuffer(GL_ARRAY_BUFFER, vbo) rawGlBufferData(GL_ARRAY_BUFFER, n_vertices * 2 * 4, None, GL_STATIC_DRAW) glEnableClientState(GL_VERTEX_ARRAY) glVertexPointer(2, GL_FLOAT, 0, None) coords_dev = cl.GLBuffer(ctx, cl.mem_flags.READ_WRITE, int(vbo)) prog = cl.Program(ctx, src).build() queue = cl.CommandQueue(ctx) cl.enqueue_acquire_gl_objects(queue, [coords_dev]) prog.generate_sin(queue, (n_vertices, ), None, coords_dev) cl.enqueue_release_gl_objects(queue, [coords_dev]) queue.finish() glFlush()
platform = cl.get_platforms()[0] context = cl.Context(properties=[(cl.context_properties.PLATFORM, platform)] + get_gl_sharing_context_properties()) queue = cl.CommandQueue(context) cl_life = cl.Buffer(context, mf.COPY_HOST_PTR, hostbuf=np_life) cl_velocity = cl.Buffer(context, mf.COPY_HOST_PTR, hostbuf=np_velocity) cl_zmel = cl.Buffer(context, mf.COPY_HOST_PTR, hostbuf=np_zmel) cl_datax = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_datax) cl_datay = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_datay) cl_dataz = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_dataz) cl_start_position = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_position) cl_start_velocity = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_velocity) if hasattr(gl_position,'buffers'): cl_gl_position = cl.GLBuffer(context, mf.READ_WRITE, int(gl_position.buffers[0])) cl_gl_color = cl.GLBuffer(context, mf.READ_WRITE, int(gl_color.buffers[0])) elif hasattr(gl_position,'buffer'): cl_gl_position = cl.GLBuffer(context, mf.READ_WRITE, int(gl_position.buffer)) cl_gl_color = cl.GLBuffer(context, mf.READ_WRITE, int(gl_color.buffer)) else: print "Can not find a proper buffer object in pyopencl install. Exiting..." sys.exit() f = open("cl_funcs_heliosphere.cl",'r') fstr = "".join(f.readlines()) program = cl.Program(context, fstr).build() glutMainLoop()
glutReshapeFunc(on_reshape) glClearColor(1, 1, 1, 1) # Set the background color to white glColor(0, 0, 0) # Set the foreground color to black platform = cl.get_platforms()[0] context = cl.Context(properties=[(cl.context_properties.PLATFORM, platform)] + get_gl_sharing_context_properties()) vertex_buffer = glGenBuffers(1) # Generate the OpenGL Buffer name glBindBuffer(GL_ARRAY_BUFFER, vertex_buffer) # Bind the vertex buffer to a target rawGlBufferData(GL_ARRAY_BUFFER, num_points * 2 * 4, None, GL_DYNAMIC_DRAW) # Allocate memory for the buffer glEnableClientState( GL_VERTEX_ARRAY ) # The vertex array is enabled for client writing and used for rendering glVertexPointer( 2, GL_FLOAT, 0, None) # Define an array of vertex data (size xyz, type, stride, pointer) cl_buffer = cl.GLBuffer(context, cl.mem_flags.READ_WRITE, int(vertex_buffer)) program = cl.Program(context, kernel).build() queue = cl.CommandQueue(context) cl.enqueue_acquire_gl_objects(queue, [cl_buffer]) program.generate_sin(queue, (num_points, ), None, cl_buffer) cl.enqueue_release_gl_objects(queue, [cl_buffer]) queue.finish() glFlush() glutMainLoop()
def loadData(self):#, pos_vbo, col_vbo): import pyopencl as cl mf = cl.mem_flags #placeholder array used to fill cl buffers #could just specify size but might want some initial values later tmp = numpy.ndarray((self.sph.max_num, 4), dtype=numpy.float32) self.pos_vbo = glutil.VBO(tmp) self.col_vbo = glutil.VBO(tmp) #Setup vertex buffer objects and share them with OpenCL as GLBuffers self.pos_vbo.bind() self.position_u = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.vbo_id)) self.col_vbo.bind() self.color_u = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.vbo_id)) #pure OpenCL arrays self.velocity_u = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp) self.velocity_s = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp) self.veleval_u = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp) self.veleval_s = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp) self.position_s = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp) self.color_s = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp) tmp_dens = numpy.ndarray((self.sph.max_num,), dtype=numpy.float32) self.density_s = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp_dens) self.force_s = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp) self.xsph_s = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp) import sys tmp_uint = numpy.ones((self.sph.max_num,), dtype=numpy.uint32) tmp_uint = tmp_uint * sys.maxint self.sort_indices = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp_uint) self.sort_hashes = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp_uint) tmp_grid = numpy.ones((self.sph.domain.nb_cells+1, ), dtype=numpy.int32) tmp_grid += -1 #grid size self.ci_start= cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp_grid) self.ci_end = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp_grid) #make struct buffers self.sphp_struct = self.sph.make_struct(self.num) self.sphp = cl.Buffer(self.ctx, mf.READ_ONLY, len(self.sphp_struct)) cl.enqueue_write_buffer(self.queue, self.sphp, self.sphp_struct).wait() self.gp_struct = self.sph.domain.make_struct(1.0) self.gp = cl.Buffer(self.ctx, mf.READ_ONLY, len(self.gp_struct)) cl.enqueue_write_buffer(self.queue, self.gp, self.gp_struct) self.gp_struct_scaled = self.sph.domain.make_struct(self.sph.sim_scale) self.gp_scaled = cl.Buffer(self.ctx, mf.READ_ONLY, len(self.gp_struct_scaled)) cl.enqueue_write_buffer(self.queue, self.gp_scaled, self.gp_struct_scaled) #debug arrays tmp_int = numpy.ndarray((self.sph.max_num, 4), dtype=numpy.int32) self.clf_debug = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp) self.cli_debug = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=tmp_int) self.queue.finish() # set up the list of GL objects to share with opencl self.gl_objects = [self.position_u, self.color_u]
gl_color) = initial_buffers(num_particles) platform = cl.get_platforms()[0] context = cl.Context(properties=[(cl.context_properties.PLATFORM, platform)] + get_gl_sharing_context_properties()) queue = cl.CommandQueue(context) cl_velocity = cl.Buffer(context, mf.COPY_HOST_PTR, hostbuf=np_velocity) cl_start_position = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_position) cl_start_velocity = cl.Buffer(context, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=np_velocity) cl_gl_position = cl.GLBuffer(context, mf.READ_WRITE, int(gl_position)) cl_gl_color = cl.GLBuffer(context, mf.READ_WRITE, int(gl_color)) kernel = """__kernel void particle_fountain(__global float4* position, __global float4* color, __global float4* velocity, __global float4* start_position, __global float4* start_velocity, float time_step) { unsigned int i = get_global_id(0); float4 p = position[i]; float4 v = velocity[i]; float life = velocity[i].w; life -= time_step; if (life <= 0.f)
def loadData(self, pos_vbo, col_vbo): mf = cl.mem_flags self.pos_vbo = pos_vbo self.col_vbo = col_vbo self.pos = pos_vbo.data self.col = col_vbo.data #Setup vertex buffer objects and share them with OpenCL as GLBuffers self.pos_vbo.bind() self.pos_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.pos_vbo.buffers[0])) self.col_vbo.bind() self.col_cl = cl.GLBuffer(self.ctx, mf.READ_WRITE, int(self.col_vbo.buffers[0])) #pure OpenCL arrays #self.vel_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=vel) #self.pos_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.pos) #self.vel_gen_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=self.vel) self.queue.finish() self.imsize = 640 * 480 self.num = self.imsize dempty = numpy.ndarray((self.imsize, 1), dtype=numpy.float32) rgbempty = numpy.ndarray((self.imsize, 3), dtype=numpy.dtype('b')) #ptd: 485.377991, 7.568644, 0.013969, 0.000000, 11.347664, -474.452148, 0.024067, 0.000000, -312.743378, -279.984619, -0.999613, 0.000000, -8.489457, 2.428294, 0.009412, 1.000000, #iptd: 0.001845, -0.000000, -0.000000, 0.000000, 0.000000, -0.001848, -0.000000, 0.000000, -0.575108, 0.489076, -1.000000, 0.000000, 0.000000, -0.000000, -0.000000, 1.000000, #temp values from calibrated kinect using librgbd calibration from Nicolas Burrus ptd = numpy.array([ 485.377991, 7.568644, 0.013969, 0.000000, 11.347664, -474.452148, 0.024067, 0.000000, -312.743378, -279.984619, -0.999613, 0.000000, -8.489457, 2.428294, 0.009412, 1.000000 ], dtype=numpy.float32) iptd = numpy.array([ 0.001845, -0.000000, -0.000000, 0.000000, 0.000000, -0.001848, -0.000000, 0.000000, -0.575108, 0.489076, -1.000000, 0.000000, 0.000000, -0.000000, -0.000000, 1.000000 ], dtype=numpy.float32) ptd = numpy.reshape(ptd, (4, 4), order='C') #ptd = ptd.T iptd = numpy.reshape(iptd, (4, 4), order='C') #iptd = iptd.T mf = cl.mem_flags self.depth_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=dempty) self.rgb_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=rgbempty) self.pt_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=ptd) self.ipt_cl = cl.Buffer(self.ctx, mf.READ_ONLY | mf.COPY_HOST_PTR, hostbuf=iptd) self.queue.finish() # set up the list of GL objects to share with opencl self.gl_objects = [self.pos_cl, self.col_cl]
def initialize_buffers(self): """Initialize OpenGL and OpenCL buffers and interop objects, and compile the OpenCL kernel. """ # Load kernel with open("src/macrospin_gpu/templates/costm-amp-dur-gl.cl" ) as template_file: kernel_template = Template(template_file.read()) # Render the CUDA module metaprogram template kernel = kernel_template.render( alpha=damping, dt=dt, sqrtDt=sqrtDt, nuSqrtDt=nu * sqrtDt, nu=nu, nu2=nu * nu, nxx=Nxx, nyy=Nyy, nzz=Nzz, hx=hExtX / Ms, hy=hExtY / Ms, hz=hExtZ / Ms, stt_op=stt_op_pre, stt_ip=stt_ip_pre, lambda2_plus1_op=lambda_op**2 + 1.0, lambda2_plus1_ip=lambda_ip**2 + 1.0, lambda2_minus1_op=lambda_op**2 - 1.0, lambda2_minus1_ip=lambda_ip**2 - 1.0, rise_time=65.0e-12, fall_time=100.0e-12, pause_before=pause_before, pause_after=pause_after, ) with open('rendered-kernel.cl', 'w') as f: f.write(kernel) # Initialize program and setup argument types for kernels self.ctx, self.queue = clinit() self.prg = cl.Program(self.ctx, kernel).build() self.evolve = self.prg.evolve self.evolve.set_scalar_arg_dtypes([None, None, None, None, np.float32]) self.reduce_m = self.prg.reduce_m self.reduce_m.set_scalar_arg_dtypes([None, None, np.int32]) self.normalize_m = self.prg.normalize_m self.update_m_of_t = self.prg.update_m_of_t self.update_m_of_t.set_scalar_arg_dtypes( [None, None, None, np.int32, np.int32, np.int32]) # release the PyOpenCL queue self.queue.finish() # Define random number generator self.rg = ran.RanluxGenerator(self.queue) # Data dimensions self.realizations = 1 # Averages over different realizations of the noise process self.current_steps = 256 self.duration_steps = 8 self.N = self.current_steps * self.duration_steps * self.realizations self.time_points = 64 # How many points to store as a function of time # Current state self.current_iter = 0 self.current_time = 0.0 self.current_timepoint = 0 # Declare the GPU bound arrays self.m = cl.array.zeros(self.queue, self.N, cl.array.vec.float4) self.dW = cl.array.zeros(self.queue, self.N, cl.array.vec.float4) self.phase_diagram = cl.array.zeros( self.queue, self.current_steps * self.duration_steps, np.float32) # Create the GPU buffers that contain the phase diagram parameters self.durations_np = np.linspace(min_duration, max_duration, self.duration_steps).astype(np.float32) self.currents_np = np.linspace(min_current, max_current, self.current_steps).astype(np.float32) self.m_of_t_np = np.ndarray( (self.current_steps * self.duration_steps * self.time_points, 4), dtype=np.float32) self.colors_np = np.ndarray( (self.current_steps * self.duration_steps * self.time_points, 4), dtype=np.float32) self.durations = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.durations_np) self.currents = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.currents_np) # Load initial magnetization state initial_m = np.zeros(self.N, dtype=cl.array.vec.float4) initial_m[:] = (1, 0, 0, 0) cl.enqueue_copy(self.queue, self.m.data, initial_m) self.colors_np[:, :] = [1., 1., 1., 1.] # White particles self.colbuf = vbo.VBO(data=self.colors_np, usage=gl.GL_DYNAMIC_DRAW, target=gl.GL_ARRAY_BUFFER) self.colbuf.bind() # For the glMultiDraw command we need an array of offsets and draw lengths self.start_indices = np.arange(0, self.current_steps * self.duration_steps * self.time_points, self.time_points, dtype=np.int32) self.draw_lengths = self.time_points * np.ones( self.current_steps * self.duration_steps, dtype=np.int32) # Declare an empty OpenGL VBO and bind it self.glbuf = vbo.VBO(data=self.m_of_t_np, usage=gl.GL_DYNAMIC_DRAW, target=gl.GL_ARRAY_BUFFER) self.glbuf.bind() # create an interop object to access to GL VBO from OpenCL self.glclbuf = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE, int(self.glbuf.buffers[0])) self.colclbuf = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE, int(self.colbuf.buffers[0]))