Exemple #1
0
def test_opencl(typename):
    from pycparserext.ext_c_parser import OpenCLCParser
    src = """
            __kernel void zeroMatrix(__global float *A, int n,  __global float * B)
            {
                %s i = get_global_id(0);
                for (int k=0; k<n; k++)
                    A[i*n+k] = 0;
            }
            """ % typename

    p = OpenCLCParser()
    ast = p.parse(src)
    ast.show()

    from pycparserext.ext_c_generator import OpenCLCGenerator
    print(OpenCLCGenerator().visit(ast))
def test_opencl():
    from pycparserext.ext_c_parser import OpenCLCParser
    src = """
            __kernel void zeroMatrix(__global float *A, int n,  __global float * B)
    {
        int i = get_global_id(0);
         for (int k=0; k<n; k++)
            A[i*n+k] = 0;
    }
            """

    p = OpenCLCParser()
    ast = p.parse(src)
    ast.show()

    from pycparserext.ext_c_generator import OpenCLCGenerator
    print OpenCLCGenerator().visit(ast)
Exemple #3
0
    def get_file_kernels(self, filename):
        '''
        Returns a list of the kernels present in the provided file
        '''
        kernels_file = self.get_name_of_kernels_file(filename)
        cached_file = self.get_name_of_instrumented_file(filename)

        # have we seen this file again?
        # (we use file_is_cached to compare files with filecmp
        #  to avoid same name issues)
        if self.file_is_cached(filename) and os.path.exists(kernels_file):
            with open(kernels_file, 'r') as f:
                kernel_list = f.read().splitlines()
        else:
            # firstly, get the kernel list

            # remove instrumentation comments
            cmdout = sp.run([self.commentRemover, filename], stdout=sp.PIPE, stderr=sp.PIPE)
            cmdout = cmdout.stdout.decode('ascii')

            src = ''.join(filter(lambda line : line.strip() and not line.startswith('#'), cmdout.splitlines(keepends=True)))

            parser = OpenCLCParser()
            ast = parser.parse(src)

            kernel_list = []
            for f in filter(lambda x : isinstance(x, FuncDef), ast):
                if any(x.endswith('kernel') for x in f.decl.funcspec):
                    kernel_list.append(f.decl.name)

            # secondly, cache the kernel list
            with open(kernels_file, 'w') as f:
                for kernel in kernel_list:
                    f.write(kernel + '\n')

        return kernel_list
Exemple #4
0
def instrument_file(file, verbose, static_features=False):

    if not os.path.exists(file):
        interact(f'Error: {file} is not a file')
        exit(1)

    interact.set_verbosity(verbose)

    ########################################
    # step 1: remove comments / preprocess #
    ########################################
    cmdout, _ = interact.run_command('Preprocessing source file', preprocessor,
                                     file)
    with open(file, 'w') as f:
        f.writelines(
            filter(lambda line: line.strip() and not line.startswith('#'),
                   cmdout.splitlines(keepends=True)))

    ############################################################################
    # step 2: add hidden counter arguments in kernels and missing curly braces #
    ############################################################################
    parser = OpenCLCParser()

    with open(file, 'r') as f:
        ast = parser.parse(f.read())

    ASTfunctions = list(filter(lambda x: isinstance(x, FuncDef), ast))
    funcCallsToEdit, kernelFuncs = [], []

    for f in ASTfunctions:
        (funcCallsToEdit,
         kernelFuncs)[any(x.endswith('kernel')
                          for x in f.decl.funcspec)].append(f.decl.name)

    # there may be (helper) functions with the attribute "inline"
    # we need to avoid them, but to remember them in order to restore them later
    inlinedFuncs = []
    for func in ASTfunctions:
        if 'inline' in func.decl.funcspec:
            func.decl.funcspec = [
                x for x in func.decl.funcspec if x != 'inline'
            ]
            inlinedFuncs.append(func.decl.name)

    # our generator adds hidden arguments and missing curly braces
    gen = OcludeFormatter(funcCallsToEdit, kernelFuncs)

    with open(file, 'w') as f:
        f.write(gen.visit(ast))

    #########################################################################
    # step 3: instrument source code with counter incrementing where needed #
    #########################################################################

    # first take the instrumentation data from the respective tool
    # after compiling source to LLVM bitcode
    # WITHOUT allowing function inlining (to get pure data for each function)

    interact.run_command('Compiling source to LLVM bitcode (1/2)',
                         cl2llCompiler, *cl2llCompilerFlags, '-O0', '-o',
                         templlvm, file)

    instrumentation_data, _ = interact.run_command(
        'Retrieving instrumentation data from LLVM bitcode',
        instrumentationGetter, templlvm)

    ### there may be a need to restore the "inline" function attribute in some functions at this point ###
    if inlinedFuncs:
        with open(file, 'r') as f:
            ast = parser.parse(f.read())
        for ext in filter(
                lambda x: isinstance(x, FuncDef) and x.decl.name in
                inlinedFuncs, ast.ext):
            ext.decl.funcspec = ['inline'] + ext.decl.funcspec
        gen = OpenCLCGenerator()
        with open(file, 'w') as f:
            f.write(gen.visit(ast))
    ### "inline" function attribute restored at this point, if it was needed to ###

    _, inliner_report = interact.run_command(
        'Compiling source to LLVM bitcode (2/2)', cl2llCompiler,
        *cl2llCompilerFlags, '-Rpass=inline', '-o', templlvm, file)
    os.remove(templlvm)

    # for each inlined function, replace the "call" with a negative "ret"
    # that means that each inlined function leads to 1 less "call" and 1 less "ret"
    inline_lines = [
        x.split()[0].split(':')[-3]
        for x in filter(lambda y: 'remark' in y, inliner_report.splitlines())
    ]
    for inline_line in inline_lines:
        instrumentation_data = instrumentation_data.replace(
            '|' + inline_line + ':call', '|retNOT', 1)

    # now add them to the source file, eventually instrumenting it
    instrumentation_per_function = add_instrumentation_data_to_file(
        file, kernelFuncs, instrumentation_data, parser)

    # instrumentation is done! Congrats!
    if static_features:
        return instrumentation_per_function

    # store a prettified (i.e. easier to read/inspect) format in the cache
    with open(file, 'r') as f:
        src = f.read()
    with open(file, 'w') as f:
        for line in src.splitlines():
            if f'atom_add(& {hidden_counter_name_local}' in line or f'atom_sub(& {hidden_counter_name_local}' in line:
                instr_idx = int(line.split('[')[1].split(']')[0])
                line += f' /* {llvm_instructions[instr_idx]} */'
            f.write(line + '\n')

    if verbose:

        interact('Final instrumented source code for inspection:')
        interact(
            '============================================================================',
            nl=False)
        interact(
            '============================================================================',
            prompt=False)

        with open(file, 'r') as f:
            for line in f.readlines():
                interact(line, prompt=False, nl=False)

        interact(
            '============================================================================',
            nl=False)
        interact(
            '============================================================================',
            prompt=False)

    interact('Intrumentation completed successfully')
def unparse_c_code_to_python(code_c: str) -> str:
    # todo prevents files: https://stackoverflow.com/questions/12644902/how-to-prevent-table-regeneration-in-ply
    # yacc.yacc(debug=False, write_tables=False)
    code_c = re.sub('#define[ ]+TP_ROOT[ ]+(cfloat|cdouble])[ ]*(\n)', '',
                    code_c)  # removes TP_ROOT = cfloat

    p = OpenCLCParser(lex_optimize=False, yacc_optimize=False)
    os.remove('yacctab.py')
    # remove block comments like /* some comment */ since other p.parse throws parsing error
    code_c = re.sub(r'\/\*(\*(?!\/)|[^*])*\*\/', '', code_c)
    code_c = code_c.replace('#pragma unroll', '')

    code_c = MacroWithArguments.replace_with_function(code_c)

    from pyopencl_extension.framework import preamble_activate_complex_numbers
    code_c = code_c.replace(preamble_activate_complex_numbers, '')
    from pyopencl_extension.framework import preamble_activate_double
    code_c = code_c.replace(preamble_activate_double, '')
    code_c = code_c.replace(
        '__const', ''
    )  # todo: create constant array class which raises error when writing to
    # todo: comments can be extracted using line numbers. Nodes in abstract syntax tree provide coords for reinsertion
    ast = p.parse(
        code_c
    )  # abstract syntax tree, why no comments? --> https://github.com/eliben/pycparser/issues/124
    module_py = []
    header = """
from typing import Tuple
from pyopencl_extension.emulation import cl_kernel, WorkItem, local_memory
from pyopencl_extension.types.funcs_for_emulation import *
from pyopencl_extension.types.utilities_np_cl import Types, c_to_np_type_name_catch
import numpy as np
            """
    module_py.append(header)
    if 'cfloat' in code_c:
        module_py.append(preamble_buff_t_complex64_np)
    elif 'cdouble' in code_c:
        module_py.append(preamble_buff_t_complex128_np)
    # module_py.append(preamble_cl_funcs_to_lambdas)

    # find funcs that contain barrier(CLK_LOCAL_MEM_FENCE) and therefore require yield from
    names_func_has_barrier.clear()
    names_func_has_barrier.extend(search_for_barrier(code_c, ast))

    names_func_require_work_item.clear()
    names_func_require_work_item.extend(
        [node.decl.name for node in ast.ext if isinstance(node, FuncDef)])
    for node in ast.ext:
        if type(node) == list:
            if len(node) == 1:
                if type(node[0]) == PreprocessorLine:
                    module_py.append(unparse_preprocessor_line(node[0]))
        if type(node) == Typedef:
            module_py.append(unparse_type_def_node(node))
        if isinstance(node, FuncDef):
            module_py.append('\n')
            if node.decl.name in MacroWithArguments.names_py_macro:  # for explanation see comment below names_macro_func_def
                module_py.append(
                    MacroWithArguments.
                    unparse_macro_node_and_convert_to_string(node))
            else:
                module_py.append(unparse_function_node(node))

    code_py = '\n'.join(module_py)
    code_py = code_py + '\n'
    # todo: deal with complex header
    # if 'cfloat_t' in code_c:
    #     preamble_buff_t = preamble_buff_t_complex64_np
    # elif 'cdouble_t' in code_c:
    #     preamble_buff_t = preamble_buff_t_complex128_np
    # else:
    #     preamble_buff_t = preamble_buff_t_real_np
    #
    # preamble_buff_t = '{}\n\n{}'.format(preamble_buff_t, preamble_cl_funcs_to_lambdas)
    return code_py
Exemple #6
0
            raise LookupError(
                "source contains more than one kernel definition")

        # function may not have arguments
        if self.extract_args and node.decl.type.args:
            for param in node.decl.type.args.params:
                self._args.append(KernelArg(param))

    @property
    def args(self):
        if self.kernel_count != 1:
            raise LookupError("source contains no kernel definitions.")
        return self._args


__parser = OpenCLCParser()


def preprocess(src: str, include_dirs: List[Path] = []) -> str:
    include_dirs = [Path(p).expanduser() for p in include_dirs]  # expand '~'
    command = ['cpp'] + [f"-I{p}" for p in include_dirs] + ['-xc', '-']

    try:
        process = Popen(command,
                        stdin=PIPE,
                        stdout=PIPE,
                        stderr=PIPE,
                        universal_newlines=True)
        stdout, stderr = process.communicate(src)
        if process.returncode != 0:
            raise OpenCLPreprocessError(" ".join(command), stdout, stderr)
Exemple #7
0
FORMAT_SPECIFIERS = {
    np.dtype("bool"): "%d",
    np.dtype("float32"): "%.3f",
    np.dtype("float64"): "%.3f",
    np.dtype("int16"): "%hd",
    np.dtype("int32"): "%d",
    np.dtype("int64"): "%ld",
    np.dtype("int8"): "%hd",
    np.dtype("uint16"): "%hu",
    np.dtype("uint32"): "%u",
    np.dtype("uint64"): "%lu",
    np.dtype("uint8"): "%hd",
}

# Private OpenCL parser instance.
_OPENCL_PARSER = OpenCLCParser()


class OpenCLPreprocessError(ValueError):
    """Raised if pre-processor fails.

  Attributes:
    command: Pre-processor invocation.
    stdout: Pre-processor output.
    stderr: Pre-processor error output.
  """
    def __init__(self, command: str, stdout: str, stderr: str):
        super(OpenCLPreprocessError, self).__init__(command)
        self.command = command
        self.stdout = stdout
        self.stderr = stderr
Exemple #8
0
def run_kernel(kernel_file_path, kernel_name,
               gsize, lsize,
               platform_id, device_id,
               samples,
               instcounts, timeit,
               verbose):
    '''
    The hostcode wrapper function
    Essentially, it is nothing more than an OpenCL template hostcode,
    but it is the heart of oclude
    '''

    interact = Interactor(__file__.split(os.sep)[-1])
    interact.set_verbosity(verbose)

    ### step 1: get OpenCL platform, device and context, ###
    ### build the kernel program and create a queue      ###
    platform = cl.get_platforms()[platform_id]
    device = platform.get_devices()[device_id]

    # check if the extension needed
    # for the ulong hidden counters exists in selected device
    if instcounts and 'cl_khr_int64_base_atomics' not in device.get_info(cl.device_info.EXTENSIONS):
        interact('WARNING: Selected device does not support the `cl_khr_int64_base_atomics` OpenCL extension!')
        interact('         This means that instructions will not get correctly reported if they are too many!')

    interact('Using the following device:')
    interact('Platform:\t' + platform.name)
    interact('Device:\t' + device.name)
    interact('Version:\t' + device.version.strip())

    context = cl.Context([device])
    with open(kernel_file_path, 'r') as kernel_file:
        kernel_source = '#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n' + kernel_file.read()
    program = cl.Program(context, kernel_source).build()

    if timeit:
        queue = cl.CommandQueue(context, properties=cl.command_queue_properties.PROFILING_ENABLE)
    else:
        queue = cl.CommandQueue(context)

    ### step 2: get kernel arg info ###
    interact(f'Kernel name: {kernel_name}')

    [kernel] = filter(lambda k : k.function_name == kernel_name, program.all_kernels())
    nargs = kernel.get_info(cl.kernel_info.NUM_ARGS)

    args = []

    for idx in range(nargs):
        kernel_arg_name = kernel.get_arg_info(idx, cl.kernel_arg_info.NAME)
        is_oclude_hidden_buffer = kernel_arg_name in [hidden_counter_name_local, hidden_counter_name_global]
        if not is_oclude_hidden_buffer:
            interact(f'Kernel arg {idx + 1}: ', nl=False)
        kernel_arg_type_name = kernel.get_arg_info(idx, cl.kernel_arg_info.TYPE_NAME)
        kernel_arg_address_qualifier = cl.kernel_arg_address_qualifier.to_string(
            kernel.get_arg_info(idx, cl.kernel_arg_info.ADDRESS_QUALIFIER)
        ).lower()
        if not is_oclude_hidden_buffer:
            interact(f'{kernel_arg_name} ({kernel_arg_type_name}, {kernel_arg_address_qualifier})', prompt=False)
        args.append((kernel_arg_name, kernel_arg_type_name, kernel_arg_address_qualifier))

    ### step 3: collect arg types ###
    arg_types = {}
    parser = None
    ast = None
    typedefs = {}
    structs = {}

    for kernel_arg_name, kernel_arg_type_name, _ in args:

        argtype_base = kernel_arg_type_name.split('*')[0]

        try:
            # it is a normal OpenCL type
            arg_types[kernel_arg_name] = eval('cltypes.' + argtype_base)

        except AttributeError:
            # it is a struct (lazy evaluation of structs)
            if parser is None:
                parser = OpenCLCParser()
                cmdout, _ = interact.run_command(None, preprocessor, kernel_file_path)
                kernel_source = '\n'.join(filter(lambda line : line.strip() and not line.startswith('#'), cmdout.splitlines()))
                ast = parser.parse(kernel_source)

                for ext in ast.ext:

                    ### typedefs ###
                    if isinstance(ext, Typedef):
                        if isinstance(ext.type.type, Struct):
                            # typedefed struct (new)
                            if ext.type.type.decls is not None:
                                typedefs[ext.name] = create_struct_type(device, ext.name, ext.type.type)
                            # typedefed struct (already seen it)
                            else:
                                previous_name = 'struct ' + ext.type.type.name
                                new_name = ext.name
                                typedefs[new_name] = structs[previous_name]
                        # simple typedef (not a struct)
                        else:
                            previous_name = ' '.join(ext.type.type.names)
                            new_name = ext.name
                            typedefs[new_name] = ext.type

                    ### struct declarations ###
                    elif isinstance(ext, Decl) and isinstance(ext.type, Struct):
                        name = 'struct ' + ext.type.name
                        structs[name] = create_struct_type(device, ext.type.name, ext.type)

            try:
                arg_types[kernel_arg_name] = structs[argtype_base]
            except KeyError:
                arg_types[kernel_arg_name] = typedefs[argtype_base]

    ### run the kernel as many times are requested by the user ###
    interact(f'About to execute kernel with Global NDRange = {gsize}' + (f' and Local NDRange = {lsize}' if lsize else ''))
    interact(f'Number of executions (a.k.a. samples) to perform: {max(samples, 1)}')

    n_executions = trange(samples, unit=' kernel executions') if samples > 1 else range(1)
    results = []

    for _ in n_executions:

        ### step 4: create argument buffers ###
        (
            arg_bufs,
            which_are_scalar,
            hidden_global_hostbuf,
            hidden_global_buf
        ) = init_kernel_arguments(context, args, arg_types, gsize)

        ### step 5: set kernel arguments and run it!
        kernel.set_scalar_arg_dtypes(which_are_scalar)

        if timeit:
            time_start = time()
            time_finish = None

        if lsize:
            event = kernel(queue, (gsize,), (lsize,), *arg_bufs)
        else:
            event = kernel(queue, (gsize,), None, *arg_bufs)

        if timeit:
            event.wait()
            time_finish = time()

        queue.flush()
        queue.finish()

        ### step 6: read back the results and report them if requested
        this_run_results = {}

        if instcounts:
            if not samples > 1:
                interact('Collecting instruction counts...')
            global_counter = np.empty_like(hidden_global_hostbuf)
            cl.enqueue_copy(queue, global_counter, hidden_global_buf)
            this_run_results['instcounts'] = dict(zip(llvm_instructions, global_counter.tolist()))

        if timeit:
            if not samples > 1:
                interact('Collecting time profiling info...')
            hostcode_time_elapsed = (time_finish - time_start) * 1000
            device_time_elapsed = (event.profile.end - event.profile.start) * 1e-6
            this_run_results['timeit'] = {
                'hostcode': hostcode_time_elapsed,
                'device':   device_time_elapsed,
                'transfer': hostcode_time_elapsed - device_time_elapsed
            }

        if this_run_results:
            results.append(this_run_results)

    interact('Kernel run' + ('s' if samples > 1 else '') + ' completed successfully')

    return results if results else None