def _arg_check(self, args, size, interval): """ Raises ------ InvalidArgument If any of the ``self``-related runtime arguments in ``args`` will cause an out-of-bounds access. """ if self.min_name not in args: raise InvalidArgument("No runtime value for %s" % self.min_name) if interval.is_Defined and args[self.min_name] + interval.lower < 0: raise InvalidArgument("OOB detected due to %s=%d" % (self.min_name, args[self.min_name])) if self.max_name not in args: raise InvalidArgument("No runtime value for %s" % self.max_name) if interval.is_Defined and args[self.max_name] + interval.upper >= size: raise InvalidArgument("OOB detected due to %s=%d" % (self.max_name, args[self.max_name])) # Allow the specific case of max=min-1, which disables the loop if args[self.max_name] < args[self.min_name]-1: raise InvalidArgument("Illegal max=%s < min=%s" % (args[self.max_name], args[self.min_name])) elif args[self.max_name] == args[self.min_name]-1: debug("%s=%d and %s=%d might cause no iterations along Dimension %s", self.min_name, args[self.min_name], self.max_name, args[self.max_name], self.name)
def make(loc, args): """Invoke the ``make`` command from within ``loc`` with arguments ``args``.""" hash_key = sha1((loc + str(args)).encode()).hexdigest() logfile = path.join(get_jit_dir(), "%s.log" % hash_key) errfile = path.join(get_jit_dir(), "%s.err" % hash_key) tic = time() with change_directory(loc): with open(logfile, "w") as lf: with open(errfile, "w") as ef: command = ['make'] + args lf.write("Compilation command:\n") lf.write(" ".join(command)) lf.write("\n\n") try: check_call(command, stderr=ef, stdout=lf) except CalledProcessError as e: raise CompilationError( 'Command "%s" return error status %d. ' 'Unable to compile code.\n' 'Compile log in %s\n' 'Compile errors in %s\n' % (e.cmd, e.returncode, logfile, errfile)) toc = time() debug("Make <%s>: run in [%.2f s]" % (" ".join(args), toc - tic))
def _arg_check(self, args, size, interval): """ :raises InvalidArgument: If any of the ``self``-related runtime arguments in ``args`` will cause an out-of-bounds access. """ if self.min_name not in args: raise InvalidArgument("No runtime value for %s" % self.min_name) if interval.is_Defined and args[self.min_name] + interval.lower < 0: raise InvalidArgument("OOB detected due to %s=%d" % (self.min_name, args[self.min_name])) if self.max_name not in args: raise InvalidArgument("No runtime value for %s" % self.max_name) if interval.is_Defined and args[self.max_name] + interval.upper >= size: raise InvalidArgument("OOB detected due to %s=%d" % (self.max_name, args[self.max_name])) # Allow the specific case of max=min-1, which disables the loop if args[self.max_name] < args[self.min_name] - 1: raise InvalidArgument("Illegal max=%s < min=%s" % (args[self.max_name], args[self.min_name])) elif args[self.max_name] == args[self.min_name] - 1: debug( "%s=%d and %s=%d might cause no iterations along Dimension %s", self.min_name, args[self.min_name], self.max_name, args[self.max_name], self.name)
def make(loc, args): """Invoke the ``make`` command from within ``loc`` with arguments ``args``.""" hash_key = sha1((loc + str(args)).encode()).hexdigest() logfile = path.join(get_jit_dir(), "%s.log" % hash_key) errfile = path.join(get_jit_dir(), "%s.err" % hash_key) tic = time() with change_directory(loc): with open(logfile, "w") as lf: with open(errfile, "w") as ef: command = ['make'] + args lf.write("Compilation command:\n") lf.write(" ".join(command)) lf.write("\n\n") try: check_call(command, stderr=ef, stdout=lf) except CalledProcessError as e: raise CompilationError('Command "%s" return error status %d. ' 'Unable to compile code.\n' 'Compile log in %s\n' 'Compile errors in %s\n' % (e.cmd, e.returncode, logfile, errfile)) toc = time() debug("Make <%s>: run in [%.2f s]" % (" ".join(args), toc-tic))
def _allocate_memory(self): """Allocate memory in terms of numpy ndarrays.""" debug("Allocating memory for %s (%s)" % (self.name, str(self.shape))) self._data_object = CMemory(self.shape, dtype=self.dtype) if self._first_touch: first_touch(self) else: self.data.fill(0)
def log_args(arguments): arg_str = [] for k, v in arguments.items(): if hasattr(v, 'shape'): arg_str.append('(%s, shape=%s, L2 Norm=%d)' % (k, str(v.shape), np.linalg.norm(v.view()))) else: arg_str.append('(%s, value=%s)' % (k, str(v))) debug("Passing Arguments: " + ", ".join(arg_str))
def wrapper(self): if self._data is None: debug("Allocating memory for %s (%s)" % (self.name, self.shape)) self._data = Data(self.shape, self.indices, self.dtype) if self._first_touch: first_touch(self) else: self.data.fill(0) return func(self)
def _allocate_memory(self): """Function to allocate memmory in terms of numpy ndarrays. Note: memmap is a subclass of ndarray. """ if self.memmap: self._data = np.memmap(filename=self.f, dtype=self.dtype, mode='w+', shape=self.shape, order='C') else: debug("Allocating memory for %s (%s)" % (self.name, str(self.shape))) self._data, self.internal_pointer = malloc_aligned( self.shape, dtype=self.dtype) first_touch(self)
def pre_apply(self, toshare): """ Set up the YaskKernel before it's called from within an Operator. :param toshare: Mapper from functions to :class:`Data`s for sharing grid storage. """ # Sanity check grids = {i.grid for i in toshare if i.is_TensorFunction and i.grid is not None} assert len(grids) == 1 grid = grids.pop() # Set the domain size, apply grid sharing, more sanity checks for k, v in zip(self.space_dimensions, grid.shape): self.soln.set_rank_domain_size(k, int(v)) for k, v in toshare.items(): target = self.grids.get(k.name) if target is not None: v._give_storage(target) assert all(not i.is_storage_allocated() for i in self.local_grids.values()) assert all(v.is_storage_allocated() for k, v in self.grids.items() if k not in self.local_grids) # Debug info debug("%s<%s,%s>" % (self.name, self.step_dimension, self.space_dimensions)) for i in list(self.grids.values()) + list(self.local_grids.values()): if i.get_num_dims() == 0: debug(" Scalar: %s", i.get_name()) elif not i.is_storage_allocated(): size = [i.get_rank_domain_size(j) for j in self.space_dimensions] debug(" LocalGrid: %s%s, size=%s" % (i.get_name(), str(i.get_dim_names()), size)) else: size = [] lpad, rpad = [], [] for j in i.get_dim_names(): if j in self.space_dimensions: size.append(i.get_rank_domain_size(j)) lpad.append(i.get_left_pad_size(j)) rpad.append(i.get_right_pad_size(j)) else: size.append(i.get_alloc_size(j)) lpad.append(0) rpad.append(0) debug(" Grid: %s%s, size=%s, left_pad=%s, right_pad=%s" % (i.get_name(), str(i.get_dim_names()), size, lpad, rpad)) # Set up the block shape for loop blocking for i, j in zip(self.space_dimensions, configuration.yask['blockshape']): self.soln.set_block_size(i, j) # This, amongst other things, allocates storage for the temporary grids self.soln.prepare_solution() # Set up auto-tuning if configuration['autotuning'] is False: self.soln.reset_auto_tuner(False) elif configuration['autotuning'] == 'preemptive': self.soln.run_auto_tuner_now()
def save(soname, binary, compiler): """ Store a binary into a file within a temporary directory. :param soname: Name of the .so file (w/o the suffix). :param binary: The binary data. :param compiler: The toolchain used for compilation. """ sofile = get_jit_dir().joinpath(soname).with_suffix(compiler.so_ext) if sofile.is_file(): debug("%s: `%s` was not saved in `%s` as it already exists" % (compiler, sofile.name, get_jit_dir())) else: with open(str(sofile), 'wb') as f: f.write(binary) debug("%s: `%s` successfully saved in `%s`" % (compiler, sofile.name, get_jit_dir()))
def jit_compile(soname, code, compiler): """ JIT compile the given C/C++ ``code``. This function relies upon codepy's ``compile_from_string``, which performs caching of compilation units and avoids potential race conditions due to multiple processing trying to compile the same object. :param soname: A unique name for the jit-compiled shared object. :param code: String of C source code. :param compiler: The toolchain used for compilation. """ target = str(get_jit_dir().joinpath(soname)) src_file = "%s.%s" % (target, compiler.src_ext) # This makes a suite of cache directories based on the soname cache_dir = get_codepy_dir().joinpath(soname[:7]) cache_dir.mkdir(parents=True, exist_ok=True) # `catch_warnings` suppresses codepy complaining that it's taking # too long to acquire the cache lock. This warning can only appear # in a multiprocess session, typically (but not necessarily) when # many processes are frequently attempting jit-compilation (e.g., # when running the test suite in parallel) with warnings.catch_warnings(): warnings.simplefilter('ignore') tic = time() # Spinlock in case of MPI sleep_delay = 0 if configuration['mpi'] else 1 _, _, _, recompiled = compile_from_string( compiler, target, code, src_file, cache_dir=cache_dir, debug=configuration['debug-compiler'], sleep_delay=sleep_delay) toc = time() if recompiled: debug("%s: compiled `%s` [%.2f s]" % (compiler, src_file, toc - tic)) else: debug("%s: cache hit `%s` [%.2f s]" % (compiler, src_file, toc - tic))
def wrapper(self): if self._data is None: debug("Allocating memory for %s%s" % (self.name, self.shape_allocated)) self._data = Data(self.shape_allocated, self.dtype, modulo=self._mask_modulo, allocator=self._allocator) if self._first_touch: assign(self, 0) if callable(self._initializer): if self._first_touch: warning("`first touch` together with `initializer` causing " "redundant data initialization") try: self._initializer(self.data_with_halo) except ValueError: # Perhaps user only wants to initialise the physical domain self._initializer(self.data) else: self.data_with_halo.fill(0) return func(self)
def save(self, soname, binary): """ Store a binary into a file within a temporary directory. Parameters ---------- soname : str Name of the .so file (w/o the suffix). binary : obj The binary data. """ sofile = self.get_jit_dir().joinpath(soname).with_suffix(self.so_ext) if sofile.is_file(): debug("%s: `%s` was not saved in `%s` as it already exists" % (self, sofile.name, self.get_jit_dir())) else: with open(str(sofile), 'wb') as f: f.write(binary) debug("%s: `%s` successfully saved in `%s`" % (self, sofile.name, self.get_jit_dir()))
def save(soname, binary, compiler): """ Store a binary into a file within a temporary directory. Parameters ---------- soname : str Name of the .so file (w/o the suffix). binary : obj The binary data. compiler : Compiler The toolchain used for JIT compilation. """ sofile = get_jit_dir().joinpath(soname).with_suffix(compiler.so_ext) if sofile.is_file(): debug("%s: `%s` was not saved in `%s` as it already exists" % (compiler, sofile.name, get_jit_dir())) else: with open(str(sofile), 'wb') as f: f.write(binary) debug("%s: `%s` successfully saved in `%s`" % (compiler, sofile.name, get_jit_dir()))
def _arg_check(self, args, size, interval): """ Raises ------ InvalidArgument If any of the ``self``-related runtime arguments in ``args`` will cause an out-of-bounds access. """ if self.min_name not in args: raise InvalidArgument("No runtime value for %s" % self.min_name) if interval.is_Defined and args[self.min_name] + interval.lower < 0: raise InvalidArgument("OOB detected due to %s=%d" % (self.min_name, args[self.min_name])) if self.max_name not in args: raise InvalidArgument("No runtime value for %s" % self.max_name) if interval.is_Defined: if is_integer(interval.upper): upper = interval.upper else: # Autopadding causes non-integer upper limit upper = interval.upper.subs(args) if args[self.max_name] + upper >= size: raise InvalidArgument("OOB detected due to %s=%d" % (self.max_name, args[self.max_name])) # Allow the specific case of max=min-1, which disables the loop if args[self.max_name] < args[self.min_name] - 1: raise InvalidArgument("Illegal %s=%d < %s=%d" % (self.max_name, args[self.max_name], self.min_name, args[self.min_name])) elif args[self.max_name] == args[self.min_name] - 1: debug( "%s=%d and %s=%d might cause no iterations along Dimension %s", self.min_name, args[self.min_name], self.max_name, args[self.max_name], self.name)
def print_defaults(): """Print the environment variables accepted by Devito, their default value, as well as all of the accepted values.""" for k, v in env_vars_mapper.items(): debug('%s: %s. Default: %s' % (k, accepted[v], defaults[v]))
def __init__(self, name, yc_soln, local_grids=None): """ Write out a YASK kernel, compile it using the YASK's Makefiles, import the corresponding SWIG-generated Python module, and finally create a YASK kernel solution object. Parameters ---------- name : str Unique name of this YaskKernel. yc_soln The YaskCompiler solution. local_grids : list of Array, optional A local grid is necessary to run the YaskKernel, but it can be deallocated upon returning to Python-land. For example, local grids could be used to implement the temporary arrays introduced by the DSE. This parameter tells which of the ``yc_soln``'s grids are local. """ self.name = name # Shared object name self.soname = "%s.devito.%s" % (name, configuration['platform']) if os.path.exists(os.path.join(namespace['yask-pylib'], '%s.py' % name)): # Nothing to do -- the YASK solution was compiled in a previous session yk = import_module(name) debug("cache hit, `%s` imported w/o jitting" % name) else: # We create and JIT compile a fresh YASK solution # The lock manager prevents race conditions # `lock_m` is used only to keep the lock manager alive with warnings.catch_warnings(): cleanup_m = CleanupManager() lock_m = CacheLockManager(cleanup_m, namespace['yask-output-dir']) # noqa # The directory in which the YASK-generated code (.hpp) will be placed yk_codegen = namespace['yask-codegen'](name, 'devito', configuration['platform']) if not os.path.exists(yk_codegen): os.makedirs(yk_codegen) # Write out the stencil file yk_codegen_file = os.path.join(yk_codegen, namespace['yask-codegen-file']) yc_soln.format(configuration['platform'].isa, ofac.new_file_output(yk_codegen_file)) # JIT-compile it compiler = configuration.yask['compiler'] if configuration['develop-mode']: if yc_soln.get_num_equations() == 0: # YASK will compile more quickly, and no price has to be paid # in terms of performance, as this is a void kernel opt_level = 0 else: opt_level = 1 else: opt_level = 3 args = [ '-j', 'YK_CXX=%s' % compiler.cc, 'YK_CXXOPT=-O%d' % opt_level, # No MPI support at the moment 'mpi=0', # To locate the YASK compiler 'YC_EXEC=%s' % os.path.join(namespace['path'], 'bin'), # Error out if a grid not explicitly defined in the compiler is created 'allow_new_grid_types=0', # To give a unique name to the generated Python modules, rather # than creating `yask_kernel.py` 'YK_BASE=%s' % name, # `stencil` and `arch` should always be provided 'stencil=%s' % 'devito', 'arch=%s' % configuration['platform'], # The root directory of generated code files, shared libs, Python modules 'YASK_OUTPUT_DIR=%s' % namespace['yask-output-dir'], # Pick the YASK kernel Makefile, i.e. the one under `yask/src/kernel` '-C', namespace['kernel-path'], # Make target 'api' ] if configuration['develop-mode']: args.append('check=1') # Activate internal YASK asserts args.append('trace=1') # Print out verbose progress msgs w/-trace knob args.append('trace_mem=0') # Print out verbose mem-access msgs make(namespace['path'], args) # Now we must be able to import the SWIG-generated Python module invalidate_caches() yk = import_module(name) # Release the lock manager cleanup_m.clean_up() # Create the YASK solution object kfac = yk.yk_factory() self.env = kfac.new_env() self.soln = kfac.new_solution(self.env) # Allow step indices to wrap-around self.soln.set_step_wrap(True) # Apply any user-provided options, if any. # These are applied here instead of just before prepare_solution() # so that applicable options will apply to all API calls self.soln.apply_command_line_options(configuration.yask['options'] or '') # MPI setup: simple rank configuration in 1st dim only. # TODO: in production runs, the ranks would be distributed along all # domain dimensions self.soln.set_num_ranks(self.space_dimensions[0], self.env.get_num_ranks()) # Redirect stdout to a string or file if configuration.yask['dump']: filename = 'yk_dump.%s.%s.%s.txt' % (name, configuration['platform'], configuration['platform'].isa) filename = os.path.join(configuration.yask['dump'], filename) self.output = yk.yask_output_factory().new_file_output(filename) else: self.output = yk.yask_output_factory().new_string_output() self.soln.set_debug_output(self.output) # Users may want to run the same Operator (same domain etc.) with # different grids self.grids = {i.get_name(): i for i in self.soln.get_grids()} self.local_grids = {i.name: self.grids[i.name] for i in (local_grids or [])}
def pre_apply(self, toshare): """ Set up the YaskKernel before it's called from within an Operator. Parameters ---------- toshare : dict Mapper ``Function -> Data`` for grid-storage sharing. """ # Sanity check grids = {i.grid for i in toshare if i.is_DiscreteFunction and i.grid is not None} assert len(grids) == 1 grid = grids.pop() # Set the domain size, apply grid sharing, more sanity checks for k, v in toshare.items(): target = self.grids.get(k.name) if target is not None: v._give_storage(target) for k, v in zip(self.space_dimensions, grid.shape): self.soln.set_rank_domain_size(k, int(v)) assert all(not i.is_storage_allocated() for i in self.local_grids.values()) assert all(v.is_storage_allocated() for k, v in self.grids.items() if k not in self.local_grids) # Debug info debug("%s<%s,%s>" % (self.name, self.step_dimension, self.space_dimensions)) for i in list(self.grids.values()) + list(self.local_grids.values()): if i.get_num_dims() == 0: debug(" Scalar: %s", i.get_name()) elif not i.is_storage_allocated(): size = [i.get_rank_domain_size(j) for j in self.space_dimensions] debug(" LocalGrid: %s%s, size=%s" % (i.get_name(), str(i.get_dim_names()), size)) else: size = [] lpad, rpad = [], [] for j in i.get_dim_names(): if j in self.space_dimensions: size.append(i.get_rank_domain_size(j)) lpad.append(i.get_left_pad_size(j)) rpad.append(i.get_right_pad_size(j)) else: size.append(i.get_alloc_size(j)) lpad.append(0) rpad.append(0) debug(" Grid: %s%s, size=%s, left_pad=%s, right_pad=%s" % (i.get_name(), str(i.get_dim_names()), size, lpad, rpad)) # Set up the block shape for loop blocking for i, j in zip(self.space_dimensions, configuration.yask['blockshape']): self.soln.set_block_size(i, j) # This, amongst other things, allocates storage for the temporary grids self.soln.prepare_solution() # Set up auto-tuning if configuration['autotuning'].level is False: self.soln.reset_auto_tuner(False) elif configuration['autotuning'].mode == 'preemptive': self.soln.run_auto_tuner_now()
def jit_compile(soname, code, compiler): """ JIT compile some source code given as a string. This function relies upon codepy's ``compile_from_string``, which performs caching of compilation units and avoids potential race conditions due to multiple processing trying to compile the same object. Parameters ---------- soname : str Name of the .so file (w/o the suffix). code : str The source code to be JIT compiled. compiler : Compiler The toolchain used for JIT compilation. """ target = str(get_jit_dir().joinpath(soname)) src_file = "%s.%s" % (target, compiler.src_ext) cache_dir = get_codepy_dir().joinpath(soname[:7]) if configuration['jit-backdoor'] is False: # Typically we end up here # Make a suite of cache directories based on the soname cache_dir.mkdir(parents=True, exist_ok=True) else: # Warning: dropping `code` on the floor in favor to whatever is written # within `src_file` try: with open(src_file, 'r') as f: code = f.read() # Bypass the devito JIT cache # Note: can't simply use Python's `mkdtemp()` as, with MPI, different # ranks would end up creating different cache dirs cache_dir = cache_dir.joinpath('jit-backdoor') cache_dir.mkdir(parents=True, exist_ok=True) except FileNotFoundError: raise ValueError("Trying to use the JIT backdoor for `%s`, but " "the file isn't present" % src_file) # `catch_warnings` suppresses codepy complaining that it's taking # too long to acquire the cache lock. This warning can only appear # in a multiprocess session, typically (but not necessarily) when # many processes are frequently attempting jit-compilation (e.g., # when running the test suite in parallel) with warnings.catch_warnings(): warnings.simplefilter('ignore') tic = time() # Spinlock in case of MPI sleep_delay = 0 if configuration['mpi'] else 1 _, _, _, recompiled = compile_from_string(compiler, target, code, src_file, cache_dir=cache_dir, debug=configuration['debug-compiler'], sleep_delay=sleep_delay) toc = time() if recompiled: debug("%s: compiled `%s` [%.2f s]" % (compiler, src_file, toc-tic)) else: debug("%s: cache hit `%s` [%.2f s]" % (compiler, src_file, toc-tic))
def print_state(): """Print the current configuration state.""" for k, v in configuration.items(): debug('%s: %s' % (k, v))
def run(self, cfunction, arguments, toshare): """ Run the YaskKernel through a JIT-compiled function. :param cfunction: The JIT-compiler function, of type :class:`ctypes.FuncPtr` :param arguments: Mapper from function/dimension/... names to run-time values to be passed to ``cfunction``. :param toshare: Mapper from functions to :class:`Data`s for sharing grid storage. """ # Sanity check grids = {i.grid for i in toshare if i.is_TensorFunction} assert len(grids) == 1 grid = grids.pop() # Set the domain size, apply grid sharing, more sanity checks for k, v in zip(self.space_dimensions, grid.shape): self.soln.set_rank_domain_size(k, int(v)) for k, v in toshare.items(): target = self.grids.get(k.name) if target is not None: v._give_storage(target) assert all(not i.is_storage_allocated() for i in self.local_grids.values()) assert all(v.is_storage_allocated() for k, v in self.grids.items() if k not in self.local_grids) # Debug info debug("%s<%s,%s>" % (self.name, self.time_dimension, self.space_dimensions)) for i in list(self.grids.values()) + list(self.local_grids.values()): if i.get_num_dims() == 0: debug(" Scalar: %s", i.get_name()) elif not i.is_storage_allocated(): size = [ i.get_rank_domain_size(j) for j in self.space_dimensions ] debug(" LocalGrid: %s%s, size=%s" % (i.get_name(), str(i.get_dim_names()), size)) else: size = [ i.get_rank_domain_size(j) for j in self.space_dimensions ] pad = [i.get_pad_size(j) for j in self.space_dimensions] debug(" Grid: %s%s, size=%s, pad=%s" % (i.get_name(), str(i.get_dim_names()), size, pad)) # Apply any user-provided option, if any self.soln.apply_command_line_options(configuration.yask['options'] or '') # Set up the block shape for loop blocking for i, j in zip(self.space_dimensions, configuration.yask['blockshape']): self.soln.set_block_size(i, j) # This, amongst other things, allocates storage for the temporary grids self.soln.prepare_solution() # Set up auto-tuning if configuration.yask['autotuning'] == 'off': self.soln.reset_auto_tuner(False) elif configuration.yask['autotuning'] == 'preemptive': self.soln.run_auto_tuner_now() # Run the kernel cfunction(*list(arguments.values())) # Release grid storage. Note: this *will not* cause deallocation, as these # grids are actually shared with the hook solution for i in self.grids.values(): i.release_storage() # Release local grid storage. This *will* cause deallocation for i in self.local_grids.values(): i.release_storage() # Dump performance data self.soln.get_stats()
def __init__(self, name, yc_soln, local_vars=None): """ Write out a YASK kernel, compile it using the YASK's Makefiles, import the corresponding SWIG-generated Python module, and finally create a YASK kernel solution object. Parameters ---------- name : str Unique name of this YaskKernel. yc_soln The YaskCompiler solution. local_vars : list of Array, optional A local var is necessary to run the YaskKernel, but it can be deallocated upon returning to Python-land. For example, local vars could be used to implement the temporary arrays introduced by the DSE. This parameter tells which of the ``yc_soln``'s vars are local. """ self.name = name # Shared object name self.soname = "%s.devito.%s" % (name, configuration['platform']) if os.path.exists(os.path.join(namespace['yask-pylib'], '%s.py' % name)): # Nothing to do -- the YASK solution was compiled in a previous session yk = import_module(name) debug("cache hit, `%s` imported w/o jitting" % name) else: # We create and JIT compile a fresh YASK solution # The lock manager prevents race conditions # `lock_m` is used only to keep the lock manager alive with warnings.catch_warnings(): cleanup_m = CleanupManager() lock_m = CacheLockManager(cleanup_m, namespace['yask-output-dir']) # noqa # The directory in which the YASK-generated code (.hpp) will be placed yk_codegen = namespace['yask-codegen'](name, 'devito', configuration['platform']) if not os.path.exists(yk_codegen): os.makedirs(yk_codegen) # Write out the stencil file yk_codegen_file = os.path.join(yk_codegen, namespace['yask-codegen-file']) yc_soln.output_solution(ofac.new_file_output(yk_codegen_file)) # JIT-compile it compiler = configuration.yask['compiler'] if configuration['develop-mode']: if yc_soln.get_num_equations() == 0: # YASK will compile more quickly, and no price has to be paid # in terms of performance, as this is a void kernel opt_level = 0 else: opt_level = 1 else: opt_level = 3 args = [ '-j', 'YK_CXX=%s' % compiler.cc, 'YK_CXXOPT=-O%d' % opt_level, # No MPI support at the moment 'mpi=0', # To locate the YASK compiler 'YC_EXEC=%s' % os.path.join(namespace['path'], 'bin'), # Error out if a var not explicitly defined in the compiler is created 'allow_new_var_types=0', # To give a unique name to the generated Python modules, rather # than creating `yask_kernel.py` 'YK_BASE=%s' % name, # `stencil` and `arch` should always be provided 'stencil=%s' % 'devito', 'arch=%s' % configuration['platform'], # The root directory of generated code files, shared libs, Python modules 'YASK_OUTPUT_DIR=%s' % namespace['yask-output-dir'], # Pick the YASK kernel Makefile, i.e. the one under `yask/src/kernel` '-C', namespace['kernel-path'], # Make target 'api' ] if configuration['develop-mode']: # Activate internal YASK asserts args.append('check=1') # Enable verbose progress msgs w/-trace knob args.append('trace=1') # Enable verbose mem-access msgs w/-trace knob args.append('trace_mem=0') compiler.make(namespace['path'], args) # Import the SWIG-generated Python module invalidate_caches() yk = import_module(name) # Release the lock manager cleanup_m.clean_up() # Create the YASK solution object kfac = yk.yk_factory() self.env = kfac.new_env() self.soln = kfac.new_solution(self.env) # Allow step indices to wrap-around self.soln.set_step_wrap(True) # Apply any user-provided options, if any. # These are applied here instead of just before prepare_solution() # so that applicable options will apply to all API calls self.soln.apply_command_line_options(configuration.yask['options'] or '') # MPI setup: simple rank configuration in 1st dim only. # TODO: in production runs, the ranks would be distributed along all # domain dimensions self.soln.set_num_ranks(self.space_dimensions[0], self.env.get_num_ranks()) # Redirect stdout to a string or file if configuration.yask['dump']: filename = 'yk_dump.%s.%s.%s.txt' % ( name, configuration['platform'], configuration['platform'].isa) filename = os.path.join(configuration.yask['dump'], filename) self.output = yk.yask_output_factory().new_file_output(filename) else: self.output = yk.yask_output_factory().new_string_output() self.soln.set_debug_output(self.output) # Users may want to run the same Operator (same domain etc.) with # different vars self.vars = {i.get_name(): i for i in self.soln.get_vars()} self.local_vars = { i.name: self.vars[i.name] for i in (local_vars or []) }