def setup_opencl(data, cube_size): import pycl blocking = True with timeify("Making context, loading kernel"): devices = pycl.clGetDeviceIDs() ctx = pycl.clCreateContext(devices=devices) queue = pycl.clCreateCommandQueue(ctx) program = pycl.clCreateProgramWithSource(ctx, SOURCE).build() score_matrix = program['score_matrix_to_rms'] score_matrix.argtypes = (pycl.cl_mem, pycl.cl_mem, pycl.cl_mem, pycl.cl_mem, pycl.cl_mem, pycl.cl_int, pycl.cl_int) sub_divisions = cube_size**3 with timeify("Creating buffers"): in_r_buf, in_evt1 = pycl.buffer_from_pyarray(queue, data['in_r'], blocking=blocking) in_g_buf, in_evt2 = pycl.buffer_from_pyarray(queue, data['in_g'], blocking=blocking) in_b_buf, in_evt3 = pycl.buffer_from_pyarray(queue, data['in_b'], blocking=blocking) out_r = data['out_r'] out_r_buf, in_evt4 = pycl.buffer_from_pyarray(queue, out_r, blocking=blocking) score = array.array('f', [0 for x in range(sub_divisions)]) score_buf, in_evt5 = pycl.buffer_from_pyarray(queue, score, blocking=blocking) with timeify("Run kernel r"): run_evt = score_matrix( #in_r_buf, in_g_buf, in_b_buf, out_r_buf, score_buf, in_r_buf, in_g_buf, in_b_buf, in_r_buf, score_buf, len(data['in_r']), cube_size, wait_for=[in_evt1, in_evt2, in_evt3, in_evt4, in_evt5]).on(queue, sub_divisions) with timeify("Retrive data"): score_from_gpu, evt = pycl.buffer_to_pyarray(queue, score_buf, wait_for=run_evt, like=score) return score_from_gpu
def __init__(self): """__init__ Creates a context and queue that can be reused across calls to this function. """ devices = cl.clGetDeviceIDs() self.device = devices[-1] self.context = cl.clCreateContext([self.device]) self.queue = cl.clCreateCommandQueue(self.context)
def test_simple_cache(self): import pycl as cl from ctree.ocl import get_context_and_queue_from_devices devices = cl.clGetDeviceIDs() device = devices[-1] results1 = get_context_and_queue_from_devices([device]) results2 = get_context_and_queue_from_devices([device]) self.assertEqual(results1, results2)
def get_gpu(): try: name = None gpu_id = None if ctree.CONFIG.has_option("opencl", "gpu"): name = ctree.CONFIG.get("opencl", "gpu") if ctree.CONFIG.has_option("opencl", "gpu_id"): gpu_id = ctree.CONFIG.get("opencl", "gpu_id") if not (gpu_id or name): return pycl.clGetDeviceIDs(device_type=pycl.CL_DEVICE_TYPE_GPU)[0] else: for gpu in pycl.clGetDeviceIDs(): if gpu.name == name: return gpu if gpu.value == gpu_id: return gpu except (pycl.DeviceNotFoundError, KeyError): return None
def __init__(self): """__init__ Creates a context and queue that can be reused across calls to this function. """ # TODO: Need dependency injection to control ocl device selection self.desired_ocl_device = -1 devices = cl.clGetDeviceIDs() self.context, self.queue = get_context_and_queue_from_devices( [devices[self.desired_ocl_device]]) self.max_work_group_size = \ devices[self.desired_ocl_device].max_work_group_size # some variables that will be used that PEP-8 wants to see initialized # in __init__ self.kernel = None self.output = None self._c_function = None
def setup_opencl(data, cube_size): import pycl blocking = True with timeify("Making context, loading kernel"): devices = pycl.clGetDeviceIDs() ctx = pycl.clCreateContext(devices = devices) queue = pycl.clCreateCommandQueue(ctx) program = pycl.clCreateProgramWithSource(ctx, SOURCE).build() score_matrix = program['score_matrix_to_rms'] score_matrix.argtypes = (pycl.cl_mem, pycl.cl_mem, pycl.cl_mem, pycl.cl_mem, pycl.cl_mem, pycl.cl_int, pycl.cl_int) sub_divisions = cube_size**3 with timeify("Creating buffers"): in_r_buf, in_evt1 = pycl.buffer_from_pyarray(queue, data['in_r'], blocking = blocking) in_g_buf, in_evt2 = pycl.buffer_from_pyarray(queue, data['in_g'], blocking = blocking) in_b_buf, in_evt3 = pycl.buffer_from_pyarray(queue, data['in_b'], blocking = blocking) out_r = data['out_r'] out_r_buf, in_evt4 = pycl.buffer_from_pyarray(queue, out_r, blocking = blocking) score = array.array('f', [0 for x in range(sub_divisions)]) score_buf, in_evt5 = pycl.buffer_from_pyarray(queue, score, blocking = blocking) with timeify("Run kernel r"): run_evt = score_matrix( #in_r_buf, in_g_buf, in_b_buf, out_r_buf, score_buf, in_r_buf, in_g_buf, in_b_buf, in_r_buf, score_buf, len(data['in_r']), cube_size, wait_for = [in_evt1, in_evt2, in_evt3, in_evt4, in_evt5]).on(queue, sub_divisions) with timeify("Retrive data"): score_from_gpu, evt = pycl.buffer_to_pyarray(queue, score_buf, wait_for=run_evt, like=score) return score_from_gpu
def get_tuning_driver(self): from ctree.tune import BruteForceTuningDriver as TuningDriver from ctree.tune import MinimizeTime from ctree.tune import IntegerParameter from ctree.tune import BooleanArrayParameter from ctree.tune import IntegerArrayParameter """ from ctree.opentuner.driver import OpenTunerDriver as TuningDriver from opentuner.search.objective import MinimizeTime from opentuner.search.manipulator import ConfigurationManipulator from opentuner.search.manipulator import IntegerParameter from opentuner.search.manipulator import BooleanArrayParameter from opentuner.search.manipulator import IntegerArrayParameter """ nMemorySpaces = len(cl.clGetDeviceIDs()) params = [ BooleanArrayParameter("parallelize", 7), IntegerArrayParameter("locs", 7, 0, nMemorySpaces), BooleanArrayParameter("distribute", 4), BooleanArrayParameter("fusion", 7), BooleanArrayParameter("reassociate", 4), ] """ manip = ConfigurationManipulator() for param in params: manip.add_parameter(param) return TuningDriver(manipulator=manip, objective=MinimizeTime()) """ return TuningDriver(params, MinimizeTime()) from ctree.tune import ConstantTuningDriver return ConstantTuningDriver({ 'locs': (0, 0, 1, 1, 0, 1, 1), 'fusion': (True, True, True, True, True, True), 'distribute': (True, True, True, True), 'reassociate': (True, True, True, True), 'parallelize': (True,) * 7 })
def ocl_init( ocl_src ): platforms = cl.clGetPlatformIDs() use_devices = None for platform in platforms: try: devices = cl.clGetDeviceIDs(platform,device_type=cl.CL_DEVICE_TYPE_GPU) use_devices = devices[0:1] # arbitraily choose first device except cl.DeviceNotFoundError: pass if use_devices is not None: break if use_devices is None: raise ValueError( "no GPU openCL device found" ) assert use_devices is not None print( "OpenCL use_devices: " + str(use_devices) ) context = cl.clCreateContext(use_devices) queue = cl.clCreateCommandQueue(context) prog = cl.clCreateProgramWithSource( context, ocl_src ).build() print prog #run_mxplusb( prog, queue ) run_conv( prog, queue )
def ocl_init(ocl_src): platforms = cl.clGetPlatformIDs() use_devices = None for platform in platforms: try: devices = cl.clGetDeviceIDs(platform, device_type=cl.CL_DEVICE_TYPE_GPU) use_devices = devices[0:1] # arbitraily choose first device except cl.DeviceNotFoundError: pass if use_devices is not None: break if use_devices is None: raise ValueError("no GPU openCL device found") assert use_devices is not None print ("OpenCL use_devices: " + str(use_devices)) context = cl.clCreateContext(use_devices) queue = cl.clCreateCommandQueue(context) prog = cl.clCreateProgramWithSource(context, ocl_src).build() print prog # run_mxplusb( prog, queue ) run_conv(prog, queue)
def __init__(self): self.device = clGetDeviceIDs()[-1] self.context, self.queue = get_context_and_queue_from_devices([ self.device ])
def __init__(self, array, output): self.device = clGetDeviceIDs()[-1] self.context = clCreateContext([self.device]) self.queue = clCreateCommandQueue(self.context) self.array = array self.output = output
def visit_FunctionDecl(self, node): # This function grabs the input and output grid names which are used to self.local_block = SymbolRef.unique() # generate the proper array macros. arg_cfg = self.arg_cfg global_size = arg_cfg[0].shape if self.testing: local_size = (1, 1, 1) else: desired_device_number = -1 device = cl.clGetDeviceIDs()[desired_device_number] lcs = LocalSizeComputer(global_size, device) local_size = lcs.compute_local_size_bulky() virtual_global_size = lcs.compute_virtual_global_size(local_size) self.global_size = global_size self.local_size = local_size self.virtual_global_size = virtual_global_size super(StencilOclTransformer, self).visit_FunctionDecl(node) for index, param in enumerate(node.params[:-1]): # TODO: Transform numpy type to ctype param.type = ct.POINTER(ct.c_float)() param.set_global() param.set_const() node.set_kernel() node.params[-1].set_global() node.params[-1].type = ct.POINTER(ct.c_float)() node.params.append(SymbolRef(self.local_block.name, ct.POINTER(ct.c_float)())) node.params[-1].set_local() node.defn = node.defn[0] # if boundary handling is copy we have to generate a collection of # boundary kernels to handle the on-gpu boundary copy if self.is_copied: device = cl.clGetDeviceIDs()[-1] self.boundary_handlers = boundary_kernel_factory( self.ghost_depth, self.output_grid, node.params[0].name, node.params[-2].name, # second last parameter is output device ) boundary_kernels = [ FunctionDecl( name=boundary_handler.kernel_name, params=node.params, defn=boundary_handler.generate_ocl_kernel_body(), ) for boundary_handler in self.boundary_handlers ] self.project.files.append(OclFile('kernel', [node])) for dim, boundary_kernel in enumerate(boundary_kernels): boundary_kernel.set_kernel() self.project.files.append(OclFile(kernel_dim_name(dim), [boundary_kernel])) self.boundary_kernels = boundary_kernels # ctree.browser_show_ast(node) # import ctree # ctree.browser_show_ast(boundary_kernels[0]) else: self.project.files.append(OclFile('kernel', [node])) # print(self.project.files[0]) # print(self.project.files[-1]) defn = [ ArrayDef( SymbolRef('global', ct.c_ulong()), arg_cfg[0].ndim, [Constant(d) for d in self.virtual_global_size] ), ArrayDef( SymbolRef('local', ct.c_ulong()), arg_cfg[0].ndim, [Constant(s) for s in local_size] # [Constant(s) for s in [512, 512]] # use this line to force a # opencl local size error ), Assign(SymbolRef("error_code", ct.c_int()), Constant(0)), ] setargs = [clSetKernelArg( SymbolRef('kernel'), Constant(d), FunctionCall(SymbolRef('sizeof'), [SymbolRef('cl_mem')]), Ref(SymbolRef('buf%d' % d)) ) for d in range(len(arg_cfg) + 1)] from functools import reduce import operator local_mem_size = reduce( operator.mul, (size + 2 * self.kernel.ghost_depth[index] for index, size in enumerate(local_size)), ct.sizeof(cl.cl_float()) ) setargs.append( clSetKernelArg( 'kernel', len(arg_cfg) + 1, local_mem_size, NULL() ) ) defn.extend(setargs) enqueue_call = FunctionCall(SymbolRef('clEnqueueNDRangeKernel'), [ SymbolRef('queue'), SymbolRef('kernel'), Constant(self.kernel.dim), NULL(), SymbolRef('global'), SymbolRef('local'), Constant(0), NULL(), NULL() ]) defn.extend(check_ocl_error(enqueue_call, "clEnqueueNDRangeKernel")) params = [ SymbolRef('queue', cl.cl_command_queue()), SymbolRef('kernel', cl.cl_kernel()) ] if self.is_copied: for dim, boundary_kernel in enumerate(self.boundary_kernels): defn.extend([ ArrayDef( SymbolRef(global_for_dim_name(dim), ct.c_ulong()), arg_cfg[0].ndim, [Constant(d) for d in self.boundary_handlers[dim].global_size] ), ArrayDef( SymbolRef(local_for_dim_name(dim), ct.c_ulong()), arg_cfg[0].ndim, [Constant(s) for s in self.boundary_handlers[dim].local_size] ) ]) setargs = [clSetKernelArg( SymbolRef(kernel_dim_name(dim)), Constant(d), FunctionCall(SymbolRef('sizeof'), [SymbolRef('cl_mem')]), Ref(SymbolRef('buf%d' % d)) ) for d in range(len(arg_cfg) + 1)] setargs.append( clSetKernelArg( SymbolRef(kernel_dim_name(dim)), len(arg_cfg) + 1, local_mem_size, NULL() ) ) defn.extend(setargs) enqueue_call = FunctionCall( SymbolRef('clEnqueueNDRangeKernel'), [ SymbolRef('queue'), SymbolRef(kernel_dim_name(dim)), Constant(self.kernel.dim), NULL(), SymbolRef(global_for_dim_name(dim)), SymbolRef(local_for_dim_name(dim)), Constant(0), NULL(), NULL() ] ) defn.append(enqueue_call) params.extend([ SymbolRef(kernel_dim_name(dim), cl.cl_kernel()) ]) # finish_call = FunctionCall(SymbolRef('clFinish'), # [SymbolRef('queue')]) # defn.append(finish_call) # finish_call = [ # Assign( # SymbolRef("error_code", ct.c_int()), # FunctionCall(SymbolRef('clFinish'), [SymbolRef('queue')]) # ), # If( # NotEq(SymbolRef("error_code"), Constant(0)), # FunctionCall( # SymbolRef("printf"), # [ # String("OPENCL KERNEL RETURNED ERROR CODE %d"), # SymbolRef("error_code") # ] # ) # ) # ] finish_call = check_ocl_error( FunctionCall(SymbolRef('clFinish'), [SymbolRef('queue')]), "clFinish" ) defn.extend(finish_call) defn.append(Return(SymbolRef("error_code"))) params.extend(SymbolRef('buf%d' % d, cl.cl_mem()) for d in range(len(arg_cfg) + 1)) control = FunctionDecl(ct.c_int32(), "stencil_control", params=params, defn=defn) return control
def main(): json_db = {} json_db['CPUs'] = {} json_db['GPUs'] = {} #email = raw_input("What is your e-mail address?: ") email = "*****@*****.**" #Get operating system. system = platform.system() #Get RAM, bits, CPU and GPU information. CPUcount = 0 GPUcount = 0 gpuType = None gpuDriver = None global CL if CL == 'openCL': CL_Devices = cl.clGetDeviceIDs() for device in CL_Devices: #CPUs if re.search('CPU', str(device.type)): CPUcount += 1 CPUd = dict([("DeviceName", str(device.name)), \ ("DeviceVendor", str(device.vendor)), \ ("DeviceBits", int(device.address_bits)), \ ("DeviceSpeedMHz", int(device.max_clock_frequency)), \ ("DeviceCores", int(device.max_compute_units)), \ ]) json_db['CPUs']['CPU' + str(CPUcount)] = CPUd #GPUs elif re.search('GPU', str(device.type)): GPUcount += 1 if re.search('(AMD|Advanced Micro Device)', str(device.vendor)): gpuType = 'ocl' #Verify GPU driver version is 13.1 or higher. gpuDriverTest = float( str(device.version).split()[3].strip('() ')) if gpuDriverTest >= 1084.4: gpuDriver = gpuDriverTest else: gpuDriver = None elif re.search('NV', str(device.vendor)): gpuType = 'cuda' #Do a RE for ###.# and test its above cut off gpuDriver = 'test' else: gpuType = None gpuDriver = None GPUd = dict([("DeviceName", str(device.name)), \ ("DeviceVendor", str(device.vendor)), \ ("DeviceBits", int(device.address_bits)), \ ("Device memory", int(device.global_mem_size) / 1024/1024 ), \ ("DeviceSpeedMHz", int(device.max_clock_frequency)), \ ("DeviceCores", int(device.max_compute_units)), \ ("GpuType", str(gpuType)), \ ("gpuDriver", gpuDriver), \ ]) json_db['GPUs']['GPU' + str(GPUcount)] = GPUd else: print("Unknown device") #Create a clientID based off system information + 4digit random number clientID = system[0] + str(device.address_bits)[0] + str( CPUcount) + str(GPUcount) + str(gpuType)[0] + '.' + str( random.randint(0000, 9999)).rjust(4, '0') SYSd = dict([ ("OS", str(system)), \ ("RAM", int(device.local_mem_size) / 1024), \ ("Bits", int(device.address_bits)), \ ("CPUs", CPUcount), \ ("GPUs", GPUcount), \ ("email", str(email)), \ ("ClientID", clientID), \ ]) json_db['System'] = SYSd #if the system doesn't have OpenCL then we can't use the GPU's anyways so just get CPU info if CL == 'nonCL': if system == 'Windows': cpus, cores, speed, cname, vendor = nonCL.windowsInfo.getCPUinfo() bits = int(nonCL.windowsInfo.getBits()) ram = int(nonCL.windowsInfo.getRAMinfo()) if system == 'Linux': cpus, cores, speed, cname, vendor = nonCL.linuxInfo().getCPUinfo() bits = int(nonCL.linuxInfo().getBits()) ram = int(nonCL.linuxInfo().getRAMinfo()) for cpu in range(len(cpus)): CPUcount += 1 CPUd = dict([("DeviceName", str(cname)), \ ("DeviceVendor", str(vendor)), \ ("DeviceBits", bits), \ ("DeviceSpeedMHz", int(speed)), \ ("DeviceCores", int(cores)), \ ]) json_db['CPUs']['CPU' + str(CPUcount)] = CPUd #Create a clientID based off system information + 4digit random number clientID = system[0] + str(bits)[0] + str( CPUcount) + str(GPUcount) + str(gpuType)[0] + '.' + str( random.randint(0000, 9999)).rjust(4, '0') SYSd = dict([ ("OS", str(system)), \ ("RAM", ram), \ ("Bits", bits), \ ("CPUs", CPUcount), \ ("GPUs", GPUcount), \ ("email", str(email)), \ ("ClientID", clientID), \ ]) json_db['System'] = SYSd #write json_db to file in human readable format. with open('info.json', 'w') as f: f.write(json.dumps(json_db, sort_keys=True, indent=4)) f.close print(json.dumps(json_db, sort_keys=True, indent=4))
backend = os.getenv("HM_BACKEND", "ocl") count = 0 def get_unique_kernel_name(): global count count += 1 return "fn{}".format(count) if backend in {"ocl", "opencl", "OCL"}: try: # platforms = cl.clGetPlatformIDs() # devices = cl.clGetDeviceIDs(platforms[1]) devices = cl.clGetDeviceIDs(device_type=cl.CL_DEVICE_TYPE_GPU) except cl.DeviceNotFoundError: devices = cl.clGetDeviceIDs() context = cl.clCreateContext(devices[-1:]) if os.environ.get("TRAVIS"): queues = [cl.clCreateCommandQueue(context)] else: queues = [cl.clCreateCommandQueue(context) for _ in range(8)] # queues = [ # cl.clCreateCommandQueue( # context, # properties=cl.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE # ) for _ in range(10) # ] queue = queues[0]
def __init__(self, shape, device=None): self.shape = shape[:] self.dimensions = len(shape) if device is None: try: device = pycl.clGetDeviceIDs()[-1] self.max_local_group_sizes = pycl.clGetDeviceInfo( device, pycl.cl_device_info.CL_DEVICE_MAX_WORK_ITEM_SIZES) self.max_work_group_size = pycl.clGetDeviceInfo( device, pycl.cl_device_info.CL_DEVICE_MAX_WORK_GROUP_SIZE) self.compute_units = pycl.clGetDeviceInfo( device, pycl.cl_device_info.CL_DEVICE_MAX_COMPUTE_UNITS) except: self.max_work_group_size = 512 self.max_local_group_sizes = [512, 512, 512] self.compute_units = 40 else: self.max_local_group_sizes = device.max_work_item_sizes self.max_work_group_size = device.max_work_group_size self.compute_units = device.max_compute_units overshoot = 1.5 # # make a first estimate of the largest index to consider in each dimension # that will be the n-th root of the max work group size in order to minimize surface area to volume ratio self.root_size = int((self.max_work_group_size ** (1.0 / self.dimensions)) + 0.5) self.max_indices = [ int(self.root_size * overshoot) for _ in range(self.dimensions) ] # # adjust each dimension downward if it exceeds the max_local_size for that dimension # adjust the other dimensions upward if there is room for dim in range(self.dimensions): if self.max_indices[dim] > self.max_local_group_sizes[dim]: self.max_indices[dim] = self.max_local_group_sizes[dim] indices_to_fix = [] for dim2 in range(self.dimensions): if dim2 != dim and self.max_indices[dim2] < self.max_local_group_sizes[dim2]: indices_to_fix.append(dim2) if len(indices_to_fix) > 0: new_root = int(int(self.max_work_group_size ** (1.0 / len(indices_to_fix)) + 0.5) * 1.5) for dim2 in indices_to_fix: self.max_indices[dim2] = min(new_root, self.max_local_group_sizes[dim2]) # if the indices we have selected so far are significantly larger than the size of the target matrix # then adjust them that dimensions index downward and adjust upward any trailing indices for dim in range(self.dimensions): if self.shape[dim] * overshoot < self.max_indices[dim]: self.max_indices[dim] = min(self.max_local_group_sizes[dim], int(self.shape[dim] * overshoot)) if dim == 0: # increase the size of the other dimensions if self.dimensions == 2: self.max_indices[1] = min( int((self.max_work_group_size / float(self.max_indices[0])) * overshoot), self.max_local_group_sizes[1] ) if self.dimensions == 3: temp_root = int(math.sqrt(self.max_work_group_size / float(self.max_indices[0])) * overshoot) self.max_indices[1] = min(temp_root, self.max_local_group_sizes[1]) self.max_indices[2] = min(temp_root, self.max_local_group_sizes[2]) elif dim == 1: # increase the size of the remaining direction if self.dimensions == 3: self.max_indices[2] = max( int((self.max_work_group_size / float(self.max_indices[0] * self.max_indices[1])) * overshoot), self.max_local_group_sizes[2] )
def main(): json_db = {} json_db['CPUs'] = {} json_db['GPUs'] = {} #email = raw_input("What is your e-mail address?: ") email = "*****@*****.**" #Get operating system. system = platform.system() #Get RAM, bits, CPU and GPU information. CPUcount = 0 GPUcount = 0 gpuType = None gpuDriver = None global CL if CL == 'openCL': CL_Devices = cl.clGetDeviceIDs() for device in CL_Devices: #CPUs if re.search('CPU', str(device.type)): CPUcount += 1 CPUd = dict([("DeviceName", str(device.name)), \ ("DeviceVendor", str(device.vendor)), \ ("DeviceBits", int(device.address_bits)), \ ("DeviceSpeedMHz", int(device.max_clock_frequency)), \ ("DeviceCores", int(device.max_compute_units)), \ ]) json_db['CPUs']['CPU'+str(CPUcount)] = CPUd #GPUs elif re.search('GPU', str(device.type)): GPUcount += 1 if re.search( '(AMD|Advanced Micro Device)', str(device.vendor) ): gpuType = 'ocl' #Verify GPU driver version is 13.1 or higher. gpuDriverTest = float(str(device.version).split()[3].strip('() ') ) if gpuDriverTest >= 1084.4: gpuDriver = gpuDriverTest else: gpuDriver = None elif re.search('NV', str(device.vendor)): gpuType = 'cuda' #Do a RE for ###.# and test its above cut off gpuDriver = 'test' else: gpuType = None gpuDriver = None GPUd = dict([("DeviceName", str(device.name)), \ ("DeviceVendor", str(device.vendor)), \ ("DeviceBits", int(device.address_bits)), \ ("Device memory", int(device.global_mem_size) / 1024/1024 ), \ ("DeviceSpeedMHz", int(device.max_clock_frequency)), \ ("DeviceCores", int(device.max_compute_units)), \ ("GpuType", str(gpuType)), \ ("gpuDriver", gpuDriver), \ ]) json_db['GPUs']['GPU'+str(GPUcount)] = GPUd else: print("Unknown device") #Create a clientID based off system information + 4digit random number clientID = system[0] + str(device.address_bits)[0] + str(CPUcount) + str(GPUcount) + str(gpuType)[0] + '.' + str(random.randint(0000,9999)).rjust(4, '0') SYSd = dict([ ("OS", str(system)), \ ("RAM", int(device.local_mem_size) / 1024), \ ("Bits", int(device.address_bits)), \ ("CPUs", CPUcount), \ ("GPUs", GPUcount), \ ("email", str(email)), \ ("ClientID", clientID), \ ]) json_db['System'] = SYSd #if the system doesn't have OpenCL then we can't use the GPU's anyways so just get CPU info if CL == 'nonCL': if system == 'Windows': cpus, cores, speed, cname, vendor = nonCL.windowsInfo.getCPUinfo() bits = int(nonCL.windowsInfo.getBits()) ram = int(nonCL.windowsInfo.getRAMinfo()) if system == 'Linux': cpus, cores, speed, cname, vendor = nonCL.linuxInfo().getCPUinfo() bits = int(nonCL.linuxInfo().getBits()) ram = int(nonCL.linuxInfo().getRAMinfo()) for cpu in range(len(cpus)): CPUcount += 1 CPUd = dict([("DeviceName", str(cname)), \ ("DeviceVendor", str(vendor)), \ ("DeviceBits", bits), \ ("DeviceSpeedMHz", int(speed)), \ ("DeviceCores", int(cores)), \ ]) json_db['CPUs']['CPU'+str(CPUcount)] = CPUd #Create a clientID based off system information + 4digit random number clientID = system[0] + str(bits)[0] + str(CPUcount) + str(GPUcount) + str(gpuType)[0] + '.' + str(random.randint(0000,9999)).rjust(4, '0') SYSd = dict([ ("OS", str(system)), \ ("RAM", ram), \ ("Bits", bits), \ ("CPUs", CPUcount), \ ("GPUs", GPUcount), \ ("email", str(email)), \ ("ClientID", clientID), \ ]) json_db['System'] = SYSd #write json_db to file in human readable format. with open('info.json', 'w') as f: f.write(json.dumps(json_db, sort_keys=True, indent=4)) f.close print(json.dumps(json_db, sort_keys=True, indent=4))
def test_simple_cache(self): devices = cl.clGetDeviceIDs() device = devices[-1] results1 = get_context_and_queue_from_devices([device]) results2 = get_context_and_queue_from_devices([device]) self.assertEqual(results1, results2)
backend = os.getenv("HM_BACKEND", "ocl") count = 0 def get_unique_kernel_name(): global count count += 1 return "fn{}".format(count) if backend in {"ocl", "opencl", "OCL"}: try: # platforms = cl.clGetPlatformIDs() # devices = cl.clGetDeviceIDs(platforms[1]) devices = cl.clGetDeviceIDs(device_type=cl.CL_DEVICE_TYPE_GPU) except cl.DeviceNotFoundError: devices = cl.clGetDeviceIDs() context = cl.clCreateContext(devices[-1:]) if os.environ.get("TRAVIS"): queues = [cl.clCreateCommandQueue(context)] else: queues = [ cl.clCreateCommandQueue( context ) for _ in range(8) ] # queues = [ # cl.clCreateCommandQueue( # context, # properties=cl.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
def __init__(self): self.device = clGetDeviceIDs()[-1] self.context = clCreateContext([self.device]) self.queue = clCreateCommandQueue(self.context)