import chroma.gpu.tools as tools class workQueue(object): def __init__(self, context ): # we get important information about work queues here self.context = context if api.is_gpu_api_opencl(): self.device = context.get_info( cl.context_info.DEVICES )[0] self.shared_mem_size = self.device.get_info( cl.device_info.LOCAL_MEM_SIZE ) self.work_group_size = self.device.get_info( cl.device_info.MAX_WORK_GROUP_SIZE ) self.work_item_sizes = self.device.get_info( cl.device_info.MAX_WORK_ITEM_SIZES ) self.work_item_dims = self.device.get_info( cl.device_info.MAX_WORK_ITEM_DIMENSIONS ) self.max_compute_units = self.device.get_info( cl.device_info.MAX_COMPUTE_UNITS ) else: self.device = context.get_device() self.shared_mem_size = self.device.max_shared_memory_per_block self.work_group_size = self.device.max_threads_per_block self.work_item_sizes = self.device.max_block_dim_x self.work_item_dimes = 3 self.max_compute_units = self.device.multiprocessor_count def print_dev_info(self): print self.device, self.shared_mem_size, self.work_group_size, self.work_group_size, self.max_compute_units if __name__ == "__main__": # Testing. os.environ['PYOPENCL_CTX']='0:1' context = tools.get_context() w = workQueue( context )
import os, sys os.environ["PYOPENCL_CTX"] ='1' import numpy as np import pyopencl as cl import pyopencl.array as clarray import chroma.gpu.tools as tools float3 = clarray.vec.float3 print "float3 type: ",float3 ctx = tools.get_context() queue = cl.CommandQueue(ctx) dev = ctx.get_info( cl.context_info.DEVICES )[0] print 'device %s' % dev.get_info( cl.device_info.NAME ) mod = tools.get_module( 'linalg_test.cl', ctx, include_source_directory=False ) size = {'block': (256,), 'grid': (1,)} a_np = np.zeros((size['block'][0],3), dtype=np.float32) b_np = np.zeros((size['block'][0],3), dtype=np.float32) c_np = np.float32(np.random.random_sample()) mf = cl.mem_flags a_vec_np = np.zeros(size['block'][0], dtype=float3) b_vec_np = np.zeros(size['block'][0], dtype=float3) d_vec_np = np.zeros(size['block'][0], dtype=float3) #c_vec_np = np.float32(np.random.random_sample()) #float3add = mod.get_function('float3add') #float3addequal = mod.get_function('float3addequal') #float3sub = mod.get_function('float3sub')