def test_opencl(typename): from pycparserext.ext_c_parser import OpenCLCParser src = """ __kernel void zeroMatrix(__global float *A, int n, __global float * B) { %s i = get_global_id(0); for (int k=0; k<n; k++) A[i*n+k] = 0; } """ % typename p = OpenCLCParser() ast = p.parse(src) ast.show() from pycparserext.ext_c_generator import OpenCLCGenerator print(OpenCLCGenerator().visit(ast))
def test_opencl(): from pycparserext.ext_c_parser import OpenCLCParser src = """ __kernel void zeroMatrix(__global float *A, int n, __global float * B) { int i = get_global_id(0); for (int k=0; k<n; k++) A[i*n+k] = 0; } """ p = OpenCLCParser() ast = p.parse(src) ast.show() from pycparserext.ext_c_generator import OpenCLCGenerator print OpenCLCGenerator().visit(ast)
def get_file_kernels(self, filename): ''' Returns a list of the kernels present in the provided file ''' kernels_file = self.get_name_of_kernels_file(filename) cached_file = self.get_name_of_instrumented_file(filename) # have we seen this file again? # (we use file_is_cached to compare files with filecmp # to avoid same name issues) if self.file_is_cached(filename) and os.path.exists(kernels_file): with open(kernels_file, 'r') as f: kernel_list = f.read().splitlines() else: # firstly, get the kernel list # remove instrumentation comments cmdout = sp.run([self.commentRemover, filename], stdout=sp.PIPE, stderr=sp.PIPE) cmdout = cmdout.stdout.decode('ascii') src = ''.join(filter(lambda line : line.strip() and not line.startswith('#'), cmdout.splitlines(keepends=True))) parser = OpenCLCParser() ast = parser.parse(src) kernel_list = [] for f in filter(lambda x : isinstance(x, FuncDef), ast): if any(x.endswith('kernel') for x in f.decl.funcspec): kernel_list.append(f.decl.name) # secondly, cache the kernel list with open(kernels_file, 'w') as f: for kernel in kernel_list: f.write(kernel + '\n') return kernel_list
def instrument_file(file, verbose, static_features=False): if not os.path.exists(file): interact(f'Error: {file} is not a file') exit(1) interact.set_verbosity(verbose) ######################################## # step 1: remove comments / preprocess # ######################################## cmdout, _ = interact.run_command('Preprocessing source file', preprocessor, file) with open(file, 'w') as f: f.writelines( filter(lambda line: line.strip() and not line.startswith('#'), cmdout.splitlines(keepends=True))) ############################################################################ # step 2: add hidden counter arguments in kernels and missing curly braces # ############################################################################ parser = OpenCLCParser() with open(file, 'r') as f: ast = parser.parse(f.read()) ASTfunctions = list(filter(lambda x: isinstance(x, FuncDef), ast)) funcCallsToEdit, kernelFuncs = [], [] for f in ASTfunctions: (funcCallsToEdit, kernelFuncs)[any(x.endswith('kernel') for x in f.decl.funcspec)].append(f.decl.name) # there may be (helper) functions with the attribute "inline" # we need to avoid them, but to remember them in order to restore them later inlinedFuncs = [] for func in ASTfunctions: if 'inline' in func.decl.funcspec: func.decl.funcspec = [ x for x in func.decl.funcspec if x != 'inline' ] inlinedFuncs.append(func.decl.name) # our generator adds hidden arguments and missing curly braces gen = OcludeFormatter(funcCallsToEdit, kernelFuncs) with open(file, 'w') as f: f.write(gen.visit(ast)) ######################################################################### # step 3: instrument source code with counter incrementing where needed # ######################################################################### # first take the instrumentation data from the respective tool # after compiling source to LLVM bitcode # WITHOUT allowing function inlining (to get pure data for each function) interact.run_command('Compiling source to LLVM bitcode (1/2)', cl2llCompiler, *cl2llCompilerFlags, '-O0', '-o', templlvm, file) instrumentation_data, _ = interact.run_command( 'Retrieving instrumentation data from LLVM bitcode', instrumentationGetter, templlvm) ### there may be a need to restore the "inline" function attribute in some functions at this point ### if inlinedFuncs: with open(file, 'r') as f: ast = parser.parse(f.read()) for ext in filter( lambda x: isinstance(x, FuncDef) and x.decl.name in inlinedFuncs, ast.ext): ext.decl.funcspec = ['inline'] + ext.decl.funcspec gen = OpenCLCGenerator() with open(file, 'w') as f: f.write(gen.visit(ast)) ### "inline" function attribute restored at this point, if it was needed to ### _, inliner_report = interact.run_command( 'Compiling source to LLVM bitcode (2/2)', cl2llCompiler, *cl2llCompilerFlags, '-Rpass=inline', '-o', templlvm, file) os.remove(templlvm) # for each inlined function, replace the "call" with a negative "ret" # that means that each inlined function leads to 1 less "call" and 1 less "ret" inline_lines = [ x.split()[0].split(':')[-3] for x in filter(lambda y: 'remark' in y, inliner_report.splitlines()) ] for inline_line in inline_lines: instrumentation_data = instrumentation_data.replace( '|' + inline_line + ':call', '|retNOT', 1) # now add them to the source file, eventually instrumenting it instrumentation_per_function = add_instrumentation_data_to_file( file, kernelFuncs, instrumentation_data, parser) # instrumentation is done! Congrats! if static_features: return instrumentation_per_function # store a prettified (i.e. easier to read/inspect) format in the cache with open(file, 'r') as f: src = f.read() with open(file, 'w') as f: for line in src.splitlines(): if f'atom_add(& {hidden_counter_name_local}' in line or f'atom_sub(& {hidden_counter_name_local}' in line: instr_idx = int(line.split('[')[1].split(']')[0]) line += f' /* {llvm_instructions[instr_idx]} */' f.write(line + '\n') if verbose: interact('Final instrumented source code for inspection:') interact( '============================================================================', nl=False) interact( '============================================================================', prompt=False) with open(file, 'r') as f: for line in f.readlines(): interact(line, prompt=False, nl=False) interact( '============================================================================', nl=False) interact( '============================================================================', prompt=False) interact('Intrumentation completed successfully')
def unparse_c_code_to_python(code_c: str) -> str: # todo prevents files: https://stackoverflow.com/questions/12644902/how-to-prevent-table-regeneration-in-ply # yacc.yacc(debug=False, write_tables=False) code_c = re.sub('#define[ ]+TP_ROOT[ ]+(cfloat|cdouble])[ ]*(\n)', '', code_c) # removes TP_ROOT = cfloat p = OpenCLCParser(lex_optimize=False, yacc_optimize=False) os.remove('yacctab.py') # remove block comments like /* some comment */ since other p.parse throws parsing error code_c = re.sub(r'\/\*(\*(?!\/)|[^*])*\*\/', '', code_c) code_c = code_c.replace('#pragma unroll', '') code_c = MacroWithArguments.replace_with_function(code_c) from pyopencl_extension.framework import preamble_activate_complex_numbers code_c = code_c.replace(preamble_activate_complex_numbers, '') from pyopencl_extension.framework import preamble_activate_double code_c = code_c.replace(preamble_activate_double, '') code_c = code_c.replace( '__const', '' ) # todo: create constant array class which raises error when writing to # todo: comments can be extracted using line numbers. Nodes in abstract syntax tree provide coords for reinsertion ast = p.parse( code_c ) # abstract syntax tree, why no comments? --> https://github.com/eliben/pycparser/issues/124 module_py = [] header = """ from typing import Tuple from pyopencl_extension.emulation import cl_kernel, WorkItem, local_memory from pyopencl_extension.types.funcs_for_emulation import * from pyopencl_extension.types.utilities_np_cl import Types, c_to_np_type_name_catch import numpy as np """ module_py.append(header) if 'cfloat' in code_c: module_py.append(preamble_buff_t_complex64_np) elif 'cdouble' in code_c: module_py.append(preamble_buff_t_complex128_np) # module_py.append(preamble_cl_funcs_to_lambdas) # find funcs that contain barrier(CLK_LOCAL_MEM_FENCE) and therefore require yield from names_func_has_barrier.clear() names_func_has_barrier.extend(search_for_barrier(code_c, ast)) names_func_require_work_item.clear() names_func_require_work_item.extend( [node.decl.name for node in ast.ext if isinstance(node, FuncDef)]) for node in ast.ext: if type(node) == list: if len(node) == 1: if type(node[0]) == PreprocessorLine: module_py.append(unparse_preprocessor_line(node[0])) if type(node) == Typedef: module_py.append(unparse_type_def_node(node)) if isinstance(node, FuncDef): module_py.append('\n') if node.decl.name in MacroWithArguments.names_py_macro: # for explanation see comment below names_macro_func_def module_py.append( MacroWithArguments. unparse_macro_node_and_convert_to_string(node)) else: module_py.append(unparse_function_node(node)) code_py = '\n'.join(module_py) code_py = code_py + '\n' # todo: deal with complex header # if 'cfloat_t' in code_c: # preamble_buff_t = preamble_buff_t_complex64_np # elif 'cdouble_t' in code_c: # preamble_buff_t = preamble_buff_t_complex128_np # else: # preamble_buff_t = preamble_buff_t_real_np # # preamble_buff_t = '{}\n\n{}'.format(preamble_buff_t, preamble_cl_funcs_to_lambdas) return code_py
def run_kernel(kernel_file_path, kernel_name, gsize, lsize, platform_id, device_id, samples, instcounts, timeit, verbose): ''' The hostcode wrapper function Essentially, it is nothing more than an OpenCL template hostcode, but it is the heart of oclude ''' interact = Interactor(__file__.split(os.sep)[-1]) interact.set_verbosity(verbose) ### step 1: get OpenCL platform, device and context, ### ### build the kernel program and create a queue ### platform = cl.get_platforms()[platform_id] device = platform.get_devices()[device_id] # check if the extension needed # for the ulong hidden counters exists in selected device if instcounts and 'cl_khr_int64_base_atomics' not in device.get_info(cl.device_info.EXTENSIONS): interact('WARNING: Selected device does not support the `cl_khr_int64_base_atomics` OpenCL extension!') interact(' This means that instructions will not get correctly reported if they are too many!') interact('Using the following device:') interact('Platform:\t' + platform.name) interact('Device:\t' + device.name) interact('Version:\t' + device.version.strip()) context = cl.Context([device]) with open(kernel_file_path, 'r') as kernel_file: kernel_source = '#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n' + kernel_file.read() program = cl.Program(context, kernel_source).build() if timeit: queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE) else: queue = cl.CommandQueue(context) ### step 2: get kernel arg info ### interact(f'Kernel name: {kernel_name}') [kernel] = filter(lambda k : k.function_name == kernel_name, program.all_kernels()) nargs = kernel.get_info(cl.kernel_info.NUM_ARGS) args = [] for idx in range(nargs): kernel_arg_name = kernel.get_arg_info(idx, cl.kernel_arg_info.NAME) is_oclude_hidden_buffer = kernel_arg_name in [hidden_counter_name_local, hidden_counter_name_global] if not is_oclude_hidden_buffer: interact(f'Kernel arg {idx + 1}: ', nl=False) kernel_arg_type_name = kernel.get_arg_info(idx, cl.kernel_arg_info.TYPE_NAME) kernel_arg_address_qualifier = cl.kernel_arg_address_qualifier.to_string( kernel.get_arg_info(idx, cl.kernel_arg_info.ADDRESS_QUALIFIER) ).lower() if not is_oclude_hidden_buffer: interact(f'{kernel_arg_name} ({kernel_arg_type_name}, {kernel_arg_address_qualifier})', prompt=False) args.append((kernel_arg_name, kernel_arg_type_name, kernel_arg_address_qualifier)) ### step 3: collect arg types ### arg_types = {} parser = None ast = None typedefs = {} structs = {} for kernel_arg_name, kernel_arg_type_name, _ in args: argtype_base = kernel_arg_type_name.split('*')[0] try: # it is a normal OpenCL type arg_types[kernel_arg_name] = eval('cltypes.' + argtype_base) except AttributeError: # it is a struct (lazy evaluation of structs) if parser is None: parser = OpenCLCParser() cmdout, _ = interact.run_command(None, preprocessor, kernel_file_path) kernel_source = '\n'.join(filter(lambda line : line.strip() and not line.startswith('#'), cmdout.splitlines())) ast = parser.parse(kernel_source) for ext in ast.ext: ### typedefs ### if isinstance(ext, Typedef): if isinstance(ext.type.type, Struct): # typedefed struct (new) if ext.type.type.decls is not None: typedefs[ext.name] = create_struct_type(device, ext.name, ext.type.type) # typedefed struct (already seen it) else: previous_name = 'struct ' + ext.type.type.name new_name = ext.name typedefs[new_name] = structs[previous_name] # simple typedef (not a struct) else: previous_name = ' '.join(ext.type.type.names) new_name = ext.name typedefs[new_name] = ext.type ### struct declarations ### elif isinstance(ext, Decl) and isinstance(ext.type, Struct): name = 'struct ' + ext.type.name structs[name] = create_struct_type(device, ext.type.name, ext.type) try: arg_types[kernel_arg_name] = structs[argtype_base] except KeyError: arg_types[kernel_arg_name] = typedefs[argtype_base] ### run the kernel as many times are requested by the user ### interact(f'About to execute kernel with Global NDRange = {gsize}' + (f' and Local NDRange = {lsize}' if lsize else '')) interact(f'Number of executions (a.k.a. samples) to perform: {max(samples, 1)}') n_executions = trange(samples, unit=' kernel executions') if samples > 1 else range(1) results = [] for _ in n_executions: ### step 4: create argument buffers ### ( arg_bufs, which_are_scalar, hidden_global_hostbuf, hidden_global_buf ) = init_kernel_arguments(context, args, arg_types, gsize) ### step 5: set kernel arguments and run it! kernel.set_scalar_arg_dtypes(which_are_scalar) if timeit: time_start = time() time_finish = None if lsize: event = kernel(queue, (gsize,), (lsize,), *arg_bufs) else: event = kernel(queue, (gsize,), None, *arg_bufs) if timeit: event.wait() time_finish = time() queue.flush() queue.finish() ### step 6: read back the results and report them if requested this_run_results = {} if instcounts: if not samples > 1: interact('Collecting instruction counts...') global_counter = np.empty_like(hidden_global_hostbuf) cl.enqueue_copy(queue, global_counter, hidden_global_buf) this_run_results['instcounts'] = dict(zip(llvm_instructions, global_counter.tolist())) if timeit: if not samples > 1: interact('Collecting time profiling info...') hostcode_time_elapsed = (time_finish - time_start) * 1000 device_time_elapsed = (event.profile.end - event.profile.start) * 1e-6 this_run_results['timeit'] = { 'hostcode': hostcode_time_elapsed, 'device': device_time_elapsed, 'transfer': hostcode_time_elapsed - device_time_elapsed } if this_run_results: results.append(this_run_results) interact('Kernel run' + ('s' if samples > 1 else '') + ' completed successfully') return results if results else None