Esempio n. 1
0
    def compile(self):
        # If we weren't in the cache we /must/ have arguments
        if not hasattr(self, '_args'):
            raise RuntimeError(
                "JITModule has no args associated with it, should never happen"
            )

        compiler = coffee.system.compiler
        externc_open = '' if not self._kernel._cpp else 'extern "C" {'
        externc_close = '' if not self._kernel._cpp else '}'
        headers = "\n".join([compiler.get('vect_header', "")])
        if any(arg._is_soa for arg in self._args):
            kernel_code = """
            #define OP2_STRIDE(a, idx) a[idx]
            %(header)s
            %(code)s
            #undef OP2_STRIDE
            """ % {
                'code': self._kernel.code(),
                'header': headers
            }
        else:
            kernel_code = """
            %(header)s
            %(code)s
            """ % {
                'code': self._kernel.code(),
                'header': headers
            }
        code_to_compile = strip(dedent(self._wrapper) % self.generate_code())

        code_to_compile = """
        #include <petsc.h>
        #include <stdbool.h>
        #include <math.h>
        #include <inttypes.h>
        %(sys_headers)s

        %(kernel)s

        %(externc_open)s
        %(wrapper)s
        %(externc_close)s
        """ % {
            'kernel': kernel_code,
            'wrapper': code_to_compile,
            'externc_open': externc_open,
            'externc_close': externc_close,
            'sys_headers':
            '\n'.join(self._kernel._headers + self._system_headers)
        }

        self._dump_generated_code(code_to_compile)
        if configuration["debug"]:
            self._wrapper_code = code_to_compile

        extension = self._extension
        cppargs = self._cppargs
        cppargs += ["-I%s/include" % d for d in get_petsc_dir()] + \
                   ["-I%s" % d for d in self._kernel._include_dirs] + \
                   ["-I%s" % os.path.abspath(os.path.dirname(__file__))]
        if compiler:
            cppargs += [compiler[coffee.system.isa['inst_set']]]
        ldargs = ["-L%s/lib" % d for d in get_petsc_dir()] + \
                 ["-Wl,-rpath,%s/lib" % d for d in get_petsc_dir()] + \
                 ["-lpetsc", "-lm"] + self._libraries
        ldargs += self._kernel._ldargs

        if self._kernel._cpp:
            extension = "cpp"
        self._fun = compilation.load(code_to_compile,
                                     extension,
                                     self._wrapper_name,
                                     cppargs=cppargs,
                                     ldargs=ldargs,
                                     argtypes=self._argtypes,
                                     restype=None,
                                     compiler=compiler.get('name'),
                                     comm=self.comm)
        # Blow away everything we don't need any more
        del self._args
        del self._kernel
        del self._itspace
        del self._direct
        return self._fun
Esempio n. 2
0
    def generate_code(self):
        indent = lambda t, i: ('\n' + '  ' * i).join(t.split('\n'))

        # 1) Construct the wrapper arguments
        code_dict = {}
        code_dict['wrapper_name'] = 'wrap_executor'
        code_dict['executor_arg'] = "%s %s" % (slope.Executor.meta['ctype_exec'],
                                               slope.Executor.meta['name_param_exec'])
        _wrapper_args = ', '.join([arg.c_wrapper_arg() for arg in self._args])
        _wrapper_decs = ';\n'.join([arg.c_wrapper_dec() for arg in self._args])
        code_dict['wrapper_args'] = _wrapper_args
        code_dict['wrapper_decs'] = indent(_wrapper_decs, 1)
        code_dict['rank'] = ", %s %s" % (slope.Executor.meta['ctype_rank'],
                                         slope.Executor.meta['rank'])
        code_dict['region_flag'] = ", %s %s" % (slope.Executor.meta['ctype_region_flag'],
                                                slope.Executor.meta['region_flag'])

        # 2) Construct the kernel invocations
        _loop_body, _user_code, _ssinds_arg = [], [], []
        # For each kernel ...
        for i, (kernel, it_space, args) in enumerate(zip(self._all_kernels,
                                                         self._all_itspaces,
                                                         self._all_args)):
            # ... bind the Executor's arguments to this kernel's arguments
            binding = []
            for a1 in args:
                for a2 in self._args:
                    if a1.data is a2.data and a1.map is a2.map:
                        a1.ref_arg = a2
                        break
                binding.append(a1.c_arg_bindto())
            binding = ";\n".join(binding)

            # ... obtain the /code_dict/ as if it were not part of an Executor,
            # since bits of code generation can be reused
            loop_code_dict = sequential.JITModule(kernel, it_space, *args, delay=True)
            loop_code_dict = loop_code_dict.generate_code()

            # ... does the scatter use global or local maps ?
            if self._use_glb_maps:
                loop_code_dict['index_expr'] = '%s[n]' % self._executor.gtl_maps[i]['DIRECT']
                prefetch_var = 'int p = %s[n + %d]' % (self._executor.gtl_maps[i]['DIRECT'],
                                                       self._use_prefetch)
            else:
                prefetch_var = 'int p = n + %d' % self._use_prefetch

            # ... add prefetch intrinsics, if requested
            prefetch_maps, prefetch_vecs = '', ''
            if self._use_prefetch:
                prefetch = lambda addr: '_mm_prefetch ((char*)(%s), _MM_HINT_T0)' % addr
                prefetch_maps = [a.c_map_entry('p') for a in args if a._is_indirect]
                # can save some instructions since prefetching targets chunks of 32 bytes
                prefetch_maps = flatten([j for j in pm if pm.index(j) % 2 == 0]
                                        for pm in prefetch_maps)
                prefetch_maps = list(OrderedDict.fromkeys(prefetch_maps))
                prefetch_maps = ';\n'.join([prefetch_var] +
                                           [prefetch('&(%s)' % pm) for pm in prefetch_maps])
                prefetch_vecs = flatten(a.c_vec_entry('p', True) for a in args
                                        if a._is_indirect)
                prefetch_vecs = ';\n'.join([prefetch(pv) for pv in prefetch_vecs])
            loop_code_dict['prefetch_maps'] = prefetch_maps
            loop_code_dict['prefetch_vecs'] = prefetch_vecs

            # ... build the subset indirection array, if necessary
            _ssind_arg, _ssind_decl = '', ''
            if loop_code_dict['ssinds_arg']:
                _ssind_arg = 'ssinds_%d' % i
                _ssind_decl = 'int* %s' % _ssind_arg
                loop_code_dict['index_expr'] = '%s[n]' % _ssind_arg

            # ... use the proper function name (the function name of the kernel
            # within *this* specific loop chain)
            loop_code_dict['kernel_name'] = kernel._function_names[self._kernel.cache_key]

            # ... finish building up the /code_dict/
            loop_code_dict['args_binding'] = binding
            loop_code_dict['tile_init'] = self._executor.c_loop_init[i]
            loop_code_dict['tile_finish'] = self._executor.c_loop_end[i]
            loop_code_dict['tile_start'] = slope.Executor.meta['tile_start']
            loop_code_dict['tile_end'] = slope.Executor.meta['tile_end']
            loop_code_dict['tile_iter'] = '%s[n]' % self._executor.gtl_maps[i]['DIRECT']
            if _ssind_arg:
                loop_code_dict['tile_iter'] = '%s[%s]' % (_ssind_arg, loop_code_dict['tile_iter'])

            # ... concatenate the rest, i.e., body, user code, ...
            _loop_body.append(strip(TilingJITModule._kernel_wrapper % loop_code_dict))
            _user_code.append(kernel._user_code)
            _ssinds_arg.append(_ssind_decl)

        _loop_chain_body = indent("\n\n".join(_loop_body), 2)
        code_dict['user_code'] = indent("\n".join(_user_code), 1)
        code_dict['ssinds_arg'] = "".join(["%s," % s for s in _ssinds_arg if s])
        code_dict['executor_code'] = indent(self._executor.c_code(_loop_chain_body), 1)

        return code_dict
Esempio n. 3
0
    def compile(self):
        # If we weren't in the cache we /must/ have arguments
        if not hasattr(self, '_args'):
            raise RuntimeError("JITModule has no args associated with it, should never happen")

        compiler = coffee.system.compiler
        externc_open = '' if not self._kernel._cpp else 'extern "C" {'
        externc_close = '' if not self._kernel._cpp else '}'
        headers = "\n".join([compiler.get('vect_header', "")])
        if any(arg._is_soa for arg in self._args):
            kernel_code = """
            #define OP2_STRIDE(a, idx) a[idx]
            %(header)s
            %(code)s
            #undef OP2_STRIDE
            """ % {'code': self._kernel.code(),
                   'header': headers}
        else:
            kernel_code = """
            %(header)s
            %(code)s
            """ % {'code': self._kernel.code(),
                   'header': headers}
        code_to_compile = strip(dedent(self._wrapper) % self.generate_code())

        code_to_compile = """
        #include <petsc.h>
        #include <stdbool.h>
        #include <math.h>
        %(sys_headers)s

        %(kernel)s

        %(externc_open)s
        %(wrapper)s
        %(externc_close)s
        """ % {'kernel': kernel_code,
               'wrapper': code_to_compile,
               'externc_open': externc_open,
               'externc_close': externc_close,
               'sys_headers': '\n'.join(self._kernel._headers + self._system_headers)}

        self._dump_generated_code(code_to_compile)
        if configuration["debug"]:
            self._wrapper_code = code_to_compile

        extension = self._extension
        cppargs = self._cppargs
        cppargs += ["-I%s/include" % d for d in get_petsc_dir()] + \
                   ["-I%s" % d for d in self._kernel._include_dirs] + \
                   ["-I%s" % os.path.abspath(os.path.dirname(__file__))]
        if compiler:
            cppargs += [compiler[coffee.system.isa['inst_set']]]
        ldargs = ["-L%s/lib" % d for d in get_petsc_dir()] + \
                 ["-Wl,-rpath,%s/lib" % d for d in get_petsc_dir()] + \
                 ["-lpetsc", "-lm"] + self._libraries
        ldargs += self._kernel._ldargs

        if self._kernel._cpp:
            extension = "cpp"
        self._fun = compilation.load(code_to_compile,
                                     extension,
                                     self._wrapper_name,
                                     cppargs=cppargs,
                                     ldargs=ldargs,
                                     argtypes=self._argtypes,
                                     restype=None,
                                     compiler=compiler.get('name'),
                                     comm=self.comm)
        # Blow away everything we don't need any more
        del self._args
        del self._kernel
        del self._itspace
        del self._direct
        return self._fun