def putdefault(self, grid): """ Derive a unique key ``K`` from a Grid`; if ``K`` is in ``self``, return the pre-existing YaskContext ``self[K]``, otherwise create a new context ``C``, set ``self[K] = C`` and return ``C``. """ assert grid is not None key = self._getkey(grid, grid.dtype) # Does a YaskContext exist already corresponding to this key? if key in self: return self[key] # Functions declared with explicit dimensions (i.e., with no Grid) must be # able to retrieve the right context partial_keys = [self._getkey(None, grid.dtype, i) for i in powerset(key[-1])] if any(i in self._partial_map for i in partial_keys if i[2]): warning("Non-unique Dimensions found in different contexts; dumping " "all known contexts. Perhaps you're attempting to use multiple " "Grids, and some of them share identical Dimensions? ") self.dump() # Create a new YaskContext context = YaskContext('ctx%d' % self._ncontexts, grid) self._ncontexts += 1 self[key] = context self._partial_map.update({i: context for i in partial_keys}) log("Context successfully created!")
def wrapper(self): if self._data is None: log("Allocating memory for %s%s" % (self.name, self.shape_allocated)) # Fetch the appropriate context context = contexts.fetch(self.dimensions, self.dtype) # Create a YASK grid; this allocates memory grid = context.make_grid(self) # /self._padding/ must be updated as (from the YASK docs): # "The value may be slightly larger [...] due to rounding" padding = [] for i in self.dimensions: if i.is_Space: padding.append((grid.get_left_extra_pad_size(i.name), grid.get_right_extra_pad_size(i.name))) else: # time and misc dimensions padding.append((0, 0)) self._padding = tuple(padding) self._data = Data(grid, self.shape_allocated, self.indices, self.dtype) self._data.reset() return func(self)
def make(loc, args): """ Invoke ``make`` command from within ``loc`` with arguments ``args``. """ hash_key = sha1((loc + str(args)).encode()).hexdigest() logfile = path.join(get_jit_dir(), "%s.log" % hash_key) errfile = path.join(get_jit_dir(), "%s.err" % hash_key) tic = time() with change_directory(loc): with open(logfile, "w") as lf: with open(errfile, "w") as ef: command = ['make'] + args lf.write("Compilation command:\n") lf.write(" ".join(command)) lf.write("\n\n") try: check_call(command, stderr=ef, stdout=lf) except CalledProcessError as e: raise CompilationError( 'Command "%s" return error status %d. ' 'Unable to compile code.\n' 'Compile log in %s\n' 'Compile errors in %s\n' % (e.cmd, e.returncode, logfile, errfile)) toc = time() log("Make <%s>: run in [%.2f s]" % (" ".join(args), toc - tic))
def execute_devito(ui, spacing=0.01, a=0.5, timesteps=500): """Execute diffusion stencil using the devito Operator API.""" nx, ny = ui.shape dx2, dy2 = spacing**2, spacing**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) # Allocate the grid and set initial condition # Note: This should be made simpler through the use of defaults u = TimeData(name='u', shape=(nx, ny), time_order=1, space_order=2) u.data[0, :] = ui[:] # Derive the stencil according to devito conventions eqn = Eq(u.dt, a * (u.dx2 + u.dy2)) stencil = solve(eqn, u.forward)[0] op = Operator(stencils=Eq(u.forward, stencil), subs={ h: spacing, s: dt }, nt=timesteps, shape=(nx, ny), spc_border=1, time_order=1) # Execute the generated Devito stencil operator tstart = time.time() op.apply() runtime = time.time() - tstart log("Devito: Diffusion with dx=%0.4f, dy=%0.4f, executed %d timesteps in %f seconds" % (spacing, spacing, timesteps, runtime)) return u.data[1, :], runtime
def wrapper(self): if self._data is None: log("Allocating memory for %s%s" % (self.name, self.shape_allocated)) # Free memory carried by stale symbolic objects # TODO: see issue #944 # CacheManager.clear(dump_contexts=False, force=False) # Fetch the appropriate context context = contexts.fetch(self.dimensions, self.dtype) # Create a YASK var; this allocates memory var = context.make_var(self) # `self._padding` must be updated as (from the YASK docs): # "The value may be slightly larger [...] due to rounding" padding = [] for i in self.dimensions: if i.is_Space: padding.append((var.get_left_extra_pad_size(i.name), var.get_right_extra_pad_size(i.name))) else: # time and misc dimensions padding.append((0, 0)) self._padding = tuple(padding) del self.shape_allocated # Invalidate cached_property self._data = Data(var, self.shape_allocated, self.indices, self.dtype) self._data.reset() return func(self)
def execute_lambdify(ui, spacing=0.01, a=0.5, timesteps=500): """Execute diffusion stencil using vectorised numpy array accesses.""" nx, ny = ui.shape dx2, dy2 = spacing**2, spacing**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) u = np.concatenate((ui, np.zeros_like(ui))).reshape((2, nx, ny)) def diffusion_stencil(): """Create stencil and substitutions for the diffusion equation""" p = sympy.Function('p') x, y, t, h, s = sympy.symbols('x y t h s') dx2 = p(x, y, t).diff(x, x).as_finite_difference([x - h, x, x + h]) dy2 = p(x, y, t).diff(y, y).as_finite_difference([y - h, y, y + h]) dt = p(x, y, t).diff(t).as_finite_difference([t, t + s]) eqn = Eq(dt, a * (dx2 + dy2)) stencil = solve(eqn, p(x, y, t + s)) return stencil, (p(x, y, t), p(x + h, y, t), p(x - h, y, t), p(x, y + h, t), p(x, y - h, t), s, h) stencil, subs = diffusion_stencil() kernel = sympy.lambdify(subs, stencil, 'numpy') # Execute timestepping loop with alternating buffers tstart = time.time() for ti in range(timesteps): t0 = ti % 2 t1 = (ti + 1) % 2 u[t1, 1:-1, 1:-1] = kernel(u[t0, 1:-1, 1:-1], u[t0, 2:, 1:-1], u[t0, :-2, 1:-1], u[t0, 1:-1, 2:], u[t0, 1:-1, :-2], dt, spacing) runtime = time.time() - tstart log("Lambdify: Diffusion with dx=%0.4f, dy=%0.4f, executed %d timesteps in %f seconds" % (spacing, spacing, timesteps, runtime)) return u[ti % 2, :, :], runtime
def fetch(self, dimensions, shape, dtype): """ Fetch the :class:`YaskContext` in ``self`` uniquely identified by ``dimensions``, ``shape``, and ``dtype``. Create a new (empty) :class:`YaskContext` on miss. """ # Sanity checks assert len(dimensions) == len(shape) dimensions = [str(i) for i in dimensions] if set(dimensions) < {'x', 'y', 'z'}: exit("Need a Function[x,y,z] for initialization") # The time dimension is dropped as implicit to the context domain = OrderedDict([(i, j) for i, j in zip(dimensions, shape) if i != namespace['time-dim']]) # A unique key for this context. key = tuple([configuration['isa'], dtype] + list(domain.items())) # Fetch or create a YaskContext if key in self: log("Fetched existing context from cache") else: self[key] = YaskContext('ctx%d' % self.ncontexts, domain, dtype) self.ncontexts += 1 log("Context successfully created!") return self[key]
def wrapper(self): if self._data is None: log("Allocating memory for %s%s" % (self.name, self.shape_allocated)) # Fetch the appropriate context context = contexts.fetch(self.dimensions, self.dtype) # Create a YASK grid; this allocates memory grid = context.make_grid(self) # `self._padding` must be updated as (from the YASK docs): # "The value may be slightly larger [...] due to rounding" padding = [] for i in self.dimensions: if i.is_Space: padding.append((grid.get_left_extra_pad_size(i.name), grid.get_right_extra_pad_size(i.name))) else: # time and misc dimensions padding.append((0, 0)) self._padding = tuple(padding) del self.shape_allocated # Invalidate cached_property self._data = Data(grid, self.shape_allocated, self.indices, self.dtype) self._data.reset() return func(self)
def jit_compile(soname, code, compiler): """ JIT compile the given C/C++ ``code``. This function relies upon codepy's ``compile_from_string``, which performs caching of compilation units and avoids potential race conditions due to multiple processing trying to compile the same object. :param soname: A unique name for the jit-compiled shared object. :param code: String of C source code. :param compiler: The toolchain used for compilation. """ target = str(get_jit_dir().joinpath(soname)) src_file = "%s.%s" % (target, compiler.src_ext) # `catch_warnings` suppresses codepy complaining that it's taking # too long to acquire the cache lock. This warning can only appear # in a multiprocess session, typically (but not necessarily) when # many processes are frequently attempting jit-compilation (e.g., # when running the test suite in parallel) with warnings.catch_warnings(): tic = time() _, _, _, recompiled = compile_from_string( compiler, target, code, src_file, cache_dir=get_codepy_dir(), debug=configuration['debug_compiler']) toc = time() if recompiled: log("%s: compiled `%s` [%.2f s]" % (compiler, src_file, toc - tic)) else: log("%s: cache hit `%s` [%.2f s]" % (compiler, src_file, toc - tic))
def jit_compile(ccode, compiler=GNUCompiler): """JIT compile the given ccode. :param ccode: String of C source code. :param compiler: The toolchain used for compilation. GNUCompiler by default. :return: The name of the compilation unit. """ hash_key = sha1(str(ccode).encode()).hexdigest() basename = path.join(get_tmp_dir(), hash_key) src_file = "%s.%s" % (basename, compiler.src_ext) if platform == "linux" or platform == "linux2": lib_file = "%s.so" % basename elif platform == "darwin": lib_file = "%s.dylib" % basename elif platform == "win32" or platform == "win64": lib_file = "%s.dll" % basename tic = time() extension_file_from_string(toolchain=compiler, ext_file=lib_file, source_string=ccode, source_name=src_file) toc = time() log("%s: compiled %s [%.2f s]" % (compiler, src_file, toc-tic)) return basename
def execute_lambdify(ui, spacing=0.01, a=0.5, timesteps=500): """Execute diffusion stencil using vectorised numpy array accesses.""" nx, ny = ui.shape dx2, dy2 = spacing**2, spacing**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) u = np.concatenate((ui, np.zeros_like(ui))).reshape((2, nx, ny)) def diffusion_stencil(): """Create stencil and substitutions for the diffusion equation""" p = Function('p') dx2 = as_finite_diff(p(x, y, t).diff(x, x), [x - h, x, x + h]) dy2 = as_finite_diff(p(x, y, t).diff(y, y), [y - h, y, y + h]) dt = as_finite_diff(p(x, y, t).diff(t), [t, t + s]) eqn = Eq(dt, a * (dx2 + dy2)) stencil = solve(eqn, p(x, y, t + s))[0] return stencil, (p(x, y, t), p(x + h, y, t), p(x - h, y, t), p(x, y + h, t), p(x, y - h, t), s, h) stencil, subs = diffusion_stencil() kernel = lambdify(subs, stencil, 'numpy') # Execute timestepping loop with alternating buffers tstart = time.time() for ti in range(timesteps): t0 = ti % 2 t1 = (ti + 1) % 2 u[t1, 1:-1, 1:-1] = kernel(u[t0, 1:-1, 1:-1], u[t0, 2:, 1:-1], u[t0, :-2, 1:-1], u[t0, 1:-1, 2:], u[t0, 1:-1, :-2], dt, spacing) runtime = time.time() - tstart log("Lambdify: Diffusion with dx=%0.4f, dy=%0.4f, executed %d timesteps in %f seconds" % (spacing, spacing, timesteps, runtime)) return u[ti % 2, :, :], runtime
def putdefault(self, grid): """ Derive a key ``K`` from the :class:`Grid` ``grid``; if ``K`` in ``self``, return the existing :class:`YaskContext` ``self[K]``, otherwise create a new context ``C``, set ``self[K] = C`` and return ``C``. """ assert grid is not None key = self._getkey(grid, grid.dtype) # Does a YaskContext exist already corresponding to this key? if key in self: return self[key] # Functions declared with explicit dimensions (i.e., with no Grid) must be # able to retrieve the right context partial_keys = [ self._getkey(None, grid.dtype, i) for i in powerset(key[-1]) ] if any(i in self._partial_map for i in partial_keys if i[2]): warning( "Non-unique Dimensions found in different contexts; dumping " "all known contexts. Perhaps you're attempting to use multiple " "Grids, and some of them share identical Dimensions? ") self.dump() # Create a new YaskContext context = YaskContext('ctx%d' % self._ncontexts, grid) self._ncontexts += 1 self[key] = context self._partial_map.update({i: context for i in partial_keys}) log("Context successfully created!")
def _specialize_iet(self, nodes): """Transform the Iteration/Expression tree to offload the computation of one or more loop nests onto YASK. This involves calling the YASK compiler to generate YASK code. Such YASK code is then called from within the transformed Iteration/Expression tree.""" log("Specializing a Devito Operator for YASK...") self.context = YaskNullContext() self.yk_soln = YaskNullKernel() offloadable = find_offloadable_trees(nodes) if len(offloadable) == 0: log("No offloadable trees found") elif len(offloadable) == 1: tree, grid, dtype = offloadable[0] self.context = contexts.fetch(grid, dtype) # Create a YASK compiler solution for this Operator yc_soln = self.context.make_yc_solution(namespace['jit-yc-soln']) transform = sympy2yask(self.context, yc_soln) try: for i in tree[-1].nodes: transform(i.expr) funcall = make_sharedptr_funcall(namespace['code-soln-run'], ['time'], namespace['code-soln-name']) funcall = Element(c.Statement(ccode(funcall))) nodes = Transformer({tree[1]: funcall}).visit(nodes) # Track /funcall/ as an external function call self.func_table[namespace['code-soln-run']] = MetaCall( None, False) # JIT-compile the newly-created YASK kernel local_grids = [i for i in transform.mapper if i.is_Array] self.yk_soln = self.context.make_yk_solution( namespace['jit-yk-soln'], yc_soln, local_grids) # Print some useful information about the newly constructed solution log("Solution '%s' contains %d grid(s) and %d equation(s)." % (yc_soln.get_name(), yc_soln.get_num_grids(), yc_soln.get_num_equations())) except: log("Unable to offload a candidate tree.") else: exit("Found more than one offloadable trees in a single Operator") # Some Iteration/Expression trees are not offloaded to YASK and may # require further processing to be executed in YASK, due to the differences # in storage layout employed by Devito and YASK nodes = make_grid_accesses(nodes) log("Specialization successfully performed!") return nodes
def __init__(self, *args, **kwargs): super(ClangCompiler, self).__init__(*args, **kwargs) self.cc = 'clang' self.ld = 'clang' self.cflags = ['-O3', '-g', '-fPIC', '-Wall'] self.ldflags = ['-shared'] if self.openmp: log("WARNING: Disabling OpenMP because clang does not support it.") self.openmp = False
def __init__(self, *args, **kwargs): super(IntelKNLCompiler, self).__init__(*args, **kwargs) self.cc = 'icc' self.ld = 'icc' self.cflags = ['-O3', '-g', '-fPIC', '-Wall', '-std=c99', "-xMIC-AVX512"] self.ldflags = ['-shared'] if self.openmp: self.ldflags += ['-qopenmp'] else: log("WARNING: Running on Intel KNL without OpenMP is highly discouraged")
def apply(self, **kwargs): # Build the arguments list to invoke the kernel function arguments, toshare = self.arguments(**kwargs) log("Running YASK Operator through Devito...") self.yk_soln.run(self.cfunction, arguments, toshare) log("YASK Operator successfully run!") # Output summary of performance achieved return self._profile_output(arguments)
def __getitem__(self, index): start, stop, shape = self._convert_index(index) if not shape: log("Data: Getting single entry %s" % str(start)) assert start == stop out = self.grid.get_element(start) else: log("Data: Getting full-array/block via index [%s]" % str(index)) out = np.empty(shape, self.dtype, 'C') self.grid.get_elements_in_slice(out.data, start, stop) return out
def __getitem__(self, index): start, stop, shape = self._convert_index(index) if not shape: log("Data: Getting single entry %s" % str(start)) assert start == stop out = self.grid.get_element(start) else: log("Data: Getting full-array/block via index [%s]" % str(index)) out = np.empty(shape, self.dtype, 'C') self.grid.get_elements_in_slice(out.data, start, stop) return out
def test_tti_staggered(shape): spacing = [10. for _ in shape] # Model model = demo_model('constant-tti', shape=shape, spacing=spacing) # Define seismic data and parameters f0 = .010 dt = model.critical_dt t0 = 0.0 tn = 250.0 time_range = TimeAxis(start=t0, stop=tn, step=dt) nt = time_range.num last = (nt - 1) % 2 # Generate a wavefield as initial condition source = RickerSource(name='src', grid=model.grid, f0=f0, time_range=time_range) source.coordinates.data[0, :] = np.array(model.domain_size) * .5 receiver = Receiver(name='rec', grid=model.grid, time_range=time_range, npoint=1) # Solvers solver_tti = AnisotropicWaveSolver(model, source=source, receiver=receiver, time_order=2, space_order=8) solver_tti2 = AnisotropicWaveSolver(model, source=source, receiver=receiver, time_order=2, space_order=8) # Solve configuration['dse'] = 'aggressive' configuration['dle'] = 'advanced' rec1, u1, v1, _ = solver_tti.forward(kernel='staggered') configuration['dle'] = 'basic' rec2, u2, v2, _ = solver_tti2.forward(kernel='staggered') u_staggered1 = u1.data[last, :] + v1.data[last, :] u_staggered2 = u2.data[last, :] + v2.data[last, :] res = np.linalg.norm(u_staggered1.reshape(-1) - u_staggered2.reshape(-1)) log("DSE/DLE introduces error %2.4e in %d dimensions" % (res, len(shape))) assert np.isclose(res, 0.0, atol=1e-8)
def fetch(self, grid, dtype, dimensions=None): """ Fetch the :class:`YaskContext` in ``self`` uniquely identified by ``grid`` and ``dtype``. """ key = self._getkey(grid, dtype, dimensions) context = self.get(key, self._partial_map.get(key)) if context is not None: log("Fetched existing context from cache") return context else: exit("Couldn't find context for grid %s" % grid)
def __init__(self, *args, **kwargs): super(IntelMICCompiler, self).__init__(*args, **kwargs) self.cc = 'icc' self.ld = 'icc' self.cflags = ['-O3', '-g', '-fPIC', '-Wall', '-std=c99', "-mmic"] self.ldflags = ['-shared'] if configuration['openmp']: self.ldflags += ['-qopenmp'] else: log("WARNING: Running on Intel MIC without OpenMP is highly discouraged" ) self._mic = __import__('pymic')
def fetch(self, dimensions, dtype): """ Fetch the YaskContext in ``self`` uniquely identified by ``dimensions`` and ``dtype``. """ key = self._getkey(None, dtype, dimensions) context = self.get(key, self._partial_map.get(key)) if context is not None: log("Fetched existing YaskContext from cache") return context else: exit("Couldn't find YaskContext for key=`%s`" % str(key))
def fetch(self, dimensions, dtype): """ Fetch the :class:`YaskContext` in ``self`` uniquely identified by ``dimensions`` and ``dtype``. """ key = self._getkey(None, dtype, dimensions) context = self.get(key, self._partial_map.get(key)) if context is not None: log("Fetched existing YaskContext from cache") return context else: exit("Couldn't find YaskContext for key=`%s`" % str(key))
def __setitem__(self, index, val): start, stop, shape = self._convert_index(index, 'set') if all(i == 1 for i in shape): log("Data: Setting single entry %s" % str(start)) assert start == stop self.grid.set_element(val, start) elif isinstance(val, np.ndarray): log("Data: Setting full-array/block via index [%s]" % str(index)) if val.shape == shape: self.grid.set_elements_in_slice(val, start, stop) elif len(val.shape) > len(shape): raise ValueError( "Data: could not broadcast input array from shape " "%s into shape %s" % (val.shape, shape)) else: # Emulate NumPy broadcasting broadcasted = np.empty(shape=shape, dtype=val.dtype) broadcasted[:] = val self.grid.set_elements_in_slice(broadcasted, start, stop) elif all(i == j - 1 for i, j in zip(shape, self.shape)): log("Data: Setting full-array to given scalar via single grid sweep" ) self.grid.set_all_elements_same(val) else: log("Data: Setting block to given scalar via index [%s]" % str(index)) self.grid.set_elements_in_slice_same(val, start, stop, True)
def make_grid(self, obj): """ Create and return a new :class:`YaskGrid`, a YASK grid wrapper. Memory is allocated. :param obj: The symbolic data object for which a YASK grid is allocated. """ if set(obj.indices) < set(self.space_dimensions): exit("Need a Function[x,y,z] to create a YASK grid.") name = 'devito_%s_%d' % (obj.name, contexts.ngrids) log("Allocating YaskGrid for %s (%s)" % (obj.name, str(obj.shape))) grid = self.yk_hook.new_grid(name, obj) wrapper = YaskGrid(grid, obj.shape, obj.space_order, obj.dtype) self.grids[name] = wrapper return wrapper
def fetch(self, grid, dtype): """ Fetch the :class:`YaskContext` in ``self`` uniquely identified by ``grid`` and ``dtype``. Create a new (empty) :class:`YaskContext` on miss. """ # A unique key for this context. key = (configuration['isa'], dtype, grid.dimensions, grid.time_dim, grid.stepping_dim) # Fetch or create a YaskContext if key in self: log("Fetched existing context from cache") else: self[key] = YaskContext('ctx%d' % self.ncontexts, grid, dtype) self.ncontexts += 1 log("Context successfully created!") return self[key]
def save(soname, binary, compiler): """ Store a binary into a file within a temporary directory. :param soname: Name of the .so file (w/o the suffix). :param binary: The binary data. :param compiler: The toolchain used for compilation. """ sofile = get_jit_dir().joinpath(soname).with_suffix(compiler.so_ext) if sofile.is_file(): log("%s: `%s` was not saved in `%s` as it already exists" % (compiler, sofile.name, get_jit_dir())) else: with open(str(path), 'wb') as f: f.write(binary) log("%s: `%s` successfully saved in `%s`" % (compiler, sofile.name, get_jit_dir()))
def test_tti_staggered(shape): spacing = [10. for _ in shape] nrec = 1 # Model model = demo_model('layers-tti', shape=shape, spacing=spacing) # Source and receiver geometries src_coordinates = np.empty((1, len(spacing))) src_coordinates[0, :] = np.array(model.domain_size) * .5 src_coordinates[0, -1] = model.origin[-1] + 2 * spacing[-1] rec_coordinates = np.empty((nrec, len(spacing))) rec_coordinates[:, 0] = np.linspace(0., model.domain_size[0], num=nrec) rec_coordinates[:, -1] = model.origin[-1] + 2 * spacing[-1] geometry = AcquisitionGeometry(model, rec_coordinates, src_coordinates, t0=0.0, tn=250., src_type='Ricker', f0=0.010) # Solvers solver_tti = AnisotropicWaveSolver(model, geometry, time_order=2, space_order=8) solver_tti2 = AnisotropicWaveSolver(model, geometry, time_order=2, space_order=8) # Solve configuration['dse'] = 'aggressive' configuration['dle'] = 'advanced' rec1, u1, v1, _ = solver_tti.forward(kernel='staggered') configuration['dle'] = 'basic' rec2, u2, v2, _ = solver_tti2.forward(kernel='staggered') res1 = np.linalg.norm(u1.data.reshape(-1) - u2.data.reshape(-1)) res2 = np.linalg.norm(v1.data.reshape(-1) - v2.data.reshape(-1)) log("DSE/DLE introduces error %2.4e, %2.4e in %d dimensions" % (res1, res2, len(shape))) assert np.isclose(res1, 0.0, atol=1e-8) assert np.isclose(res2, 0.0, atol=1e-8)
def jit_compile(ccode, basename, compiler=GNUCompiler): """JIT compiles the given ccode and returns the lib filepath. :param ccode: String of C source code. :param basename: The string used to name various files for this compilation. :param compiler: The toolchain used for compilation. GNUCompiler by default. :return: Path to compiled lib """ src_file = "%s.cpp" % basename lib_file = "%s.so" % basename log("%s: Compiling %s" % (compiler, src_file)) extension_file_from_string(toolchain=compiler, ext_file=lib_file, source_string=ccode, source_name=src_file) return lib_file
def print_profiling(state): """ Print a summary of the applied transformations. """ timings = state.timings if configuration['profiling'] in ['basic', 'advanced']: row = "%s (elapsed: %.2f s)" out = "\n ".join( row % ("".join(filter(lambda c: not c.isdigit(), k[1:])), v) for k, v in timings.items()) elapsed = sum(timings.values()) log("%s\n [Total elapsed: %.2f s]" % (out, elapsed)) else: # Shorter summary log("passes: %s (elapsed %.2f s)" % (",".join(i[1:] for i in timings), sum(timings.values())))
def apply(self, **kwargs): # Build the arguments list to invoke the kernel function arguments = self.arguments(**kwargs) # Map default Functions to runtime Functions; will be used for "grid sharing" toshare = {} for i in self.input: v = kwargs.get(i.name, i) if np.isscalar(v): toshare[i] = DataScalar(v) elif i.from_YASK and (i.is_Constant or i.is_Function): toshare[v] = v.data log("Running YASK Operator through Devito...") arg_values = [arguments[p.name] for p in self.parameters] self.yk_soln.run(self.cfunction, arg_values, toshare) log("YASK Operator successfully run!") # Output summary of performance achieved return self._profile_output(arguments)
def execute_numpy(ui, spacing=0.01, a=0.5, timesteps=500): """Execute diffusion stencil using vectorised numpy array accesses.""" nx, ny = ui.shape dx2, dy2 = spacing**2, spacing**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) u = np.concatenate((ui, np.zeros_like(ui))).reshape((2, nx, ny)) # Execute timestepping loop with alternating buffers tstart = time.time() for ti in range(timesteps): t0 = ti % 2 t1 = (ti + 1) % 2 uxx = (u[t0, 2:, 1:-1] - 2*u[t0, 1:-1, 1:-1] + u[t0, :-2, 1:-1]) / dx2 uyy = (u[t0, 1:-1, 2:] - 2*u[t0, 1:-1, 1:-1] + u[t0, 1:-1, :-2]) / dy2 u[t1, 1:-1, 1:-1] = u[t0, 1:-1, 1:-1] + a * dt * (uxx + uyy) runtime = time.time() - tstart log("Numpy: Diffusion with dx=%0.4f, dy=%0.4f, executed %d timesteps in %f seconds" % (spacing, spacing, timesteps, runtime)) return u[ti % 2, :, :], runtime
def execute_python(ui, spacing=0.01, a=0.5, timesteps=500): """Execute diffusion stencil using pure Python list indexing.""" nx, ny = ui.shape dx2, dy2 = spacing**2, spacing**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) u = np.concatenate((ui, np.zeros_like(ui))).reshape((2, nx, ny)) # Execute timestepping loop with alternating buffers tstart = time.time() for ti in range(timesteps): t0 = ti % 2 t1 = (ti + 1) % 2 for i in range(1, nx-1): for j in range(1, ny-1): uxx = (u[t0, i+1, j] - 2*u[t0, i, j] + u[t0, i-1, j]) / dx2 uyy = (u[t0, i, j+1] - 2*u[t0, i, j] + u[t0, i, j-1]) / dy2 u[t1, i, j] = u[t0, i, j] + dt * a * (uxx + uyy) runtime = time.time() - tstart log("Python: Diffusion with dx=%0.4f, dy=%0.4f, executed %d timesteps in %f seconds" % (spacing, spacing, timesteps, runtime)) return u[ti % 2, :, :], runtime
def wrapper(self): if self._data is None: log("Allocating memory for %s%s" % (self.name, self.shape_allocated)) # Fetch the appropriate context context = contexts.fetch(self.grid, self.dtype) # TODO : the following will fail if not using a SteppingDimension, # eg with save=True one gets /time/ instead /t/ grid = context.make_grid(self) # /self._padding/ must be updated as (from the YASK docs): # "The value may be slightly larger [...] due to rounding" pad = [(0, 0) if i.is_Time else (grid.get_left_extra_pad_size(i.name), grid.get_right_extra_pad_size(i.name)) for i in self.indices] self._padding = pad self._data = Data(grid, self.shape_allocated, self.indices, self.dtype) self._data.reset() return func(self)
def print_profiling(states): """ Print a summary of the applied transformations. """ # Drop unprofiled clusters/states states = [i for i in states if i.ops] if configuration['profiling'] == 'advanced': tot_elapsed = 0. row = "%s [flops: %d, elapsed: %.2f s]" for n, i in enumerate(states): log(" >>\n ".join(row % ("".join(filter(lambda c: not c.isdigit(), k[1:])), i.ops[k], v) for k, v in i.timings.items())) tot_elapsed += sum(i.timings.values()) log("[Total elapsed: %.2f s]" % tot_elapsed) else: # Shorter summary tot_elapsed = 0. row = "flops: %d >> %d (elapsed %.2f s)" rows = [] for i in states: elapsed = sum(i.timings.values()) tot_elapsed += elapsed keys = list(i.timings) rows.append(row % (i.ops[keys[0]], i.ops[keys[-1]], elapsed)) rows = "\n ".join(rows) log("%s\n [Total elapsed: %.2f s]" % (rows, tot_elapsed))
def execute_devito(ui, spacing=0.01, a=0.5, timesteps=500): """Execute diffusion stencil using the devito Operator API.""" nx, ny = ui.shape dx2, dy2 = spacing**2, spacing**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) # Allocate the grid and set initial condition # Note: This should be made simpler through the use of defaults grid = Grid(shape=(nx, ny)) u = TimeFunction(name='u', grid=grid, time_order=1, space_order=2) u.data[0, :] = ui[:] # Derive the stencil according to devito conventions eqn = Eq(u.dt, a * (u.dx2 + u.dy2)) stencil = solve(eqn, u.forward) op = Operator(Eq(u.forward, stencil)) # Execute the generated Devito stencil operator tstart = time.time() op.apply(u=u, t=timesteps, dt=dt) runtime = time.time() - tstart log("Devito: Diffusion with dx=%0.4f, dy=%0.4f, executed %d timesteps in %f seconds" % (spacing, spacing, timesteps, runtime)) return u.data[1, :], runtime
def __setitem__(self, index, val): start, stop, shape = self._convert_index(index, 'set') if all(i == 1 for i in shape): log("Data: Setting single entry %s" % str(start)) assert start == stop self.grid.set_element(val, start) elif isinstance(val, np.ndarray): log("Data: Setting full-array/block via index [%s]" % str(index)) if val.shape == shape: self.grid.set_elements_in_slice(val, start, stop) elif len(val.shape) > len(shape): raise ValueError("Data: could not broadcast input array from shape " "%s into shape %s" % (val.shape, shape)) else: # Emulate NumPy broadcasting broadcasted = np.empty(shape=shape, dtype=val.dtype) broadcasted[:] = val self.grid.set_elements_in_slice(broadcasted, start, stop) elif all(i == j-1 for i, j in zip(shape, self.shape)): log("Data: Setting full-array to given scalar via single grid sweep") self.grid.set_all_elements_same(val) else: log("Data: Setting block to given scalar via index [%s]" % str(index)) self.grid.set_elements_in_slice_same(val, start, stop, True)
def clear(cls): log("Dumping contexts and symbol caches") contexts.dump() super(CacheManager, cls).clear()
def test_tti(shape, space_order, kernel): """ This first test compare the solution of the acoustic wave-equation and the TTI wave-eqatuon with all anisotropy parametrs to 0. The two solutions should be the same. """ if kernel == 'shifted': space_order *= 2 to = 2 so = space_order nbpml = 10 origin = [0. for _ in shape] spacing = [10. for _ in shape] vp = 1.5 * np.ones(shape) nrec = shape[0] # Constant model for true velocity model = Model(origin=origin, shape=shape, vp=vp, spacing=spacing, nbpml=nbpml, space_order=space_order, epsilon=np.zeros(shape), delta=np.zeros(shape), theta=np.zeros(shape), phi=np.zeros(shape)) # Source and receiver geometries src_coordinates = np.empty((1, len(spacing))) src_coordinates[0, :] = np.array(model.domain_size) * .5 src_coordinates[0, -1] = model.origin[-1] + 2 * spacing[-1] rec_coordinates = np.empty((nrec, len(spacing))) rec_coordinates[:, 0] = np.linspace(0., model.domain_size[0], num=nrec) rec_coordinates[:, 1] = np.array(model.domain_size)[1] * .5 rec_coordinates[:, -1] = model.origin[-1] + 2 * spacing[-1] geometry = AcquisitionGeometry(model, rec_coordinates, src_coordinates, t0=0.0, tn=350., src_type='Ricker', f0=0.010) acoustic = AcousticWaveSolver(model, geometry, time_order=2, space_order=so) rec, u1, _ = acoustic.forward(save=False) # Solvers solver_tti = AnisotropicWaveSolver(model, geometry, time_order=2, space_order=space_order) # zero src src = geometry.src src.data.fill(0.) # last time index nt = geometry.nt last = (nt - 2) % 3 indlast = [(last + 1) % 3, last % 3, (last-1) % 3] # Create new wavefield object restart forward computation u = TimeFunction(name='u', grid=model.grid, time_order=2, space_order=so) u.data[0:3, :] = u1.data[indlast, :] acoustic.forward(save=False, u=u, time_M=10, src=src) utti = TimeFunction(name='u', grid=model.grid, time_order=to, space_order=so) vtti = TimeFunction(name='v', grid=model.grid, time_order=to, space_order=so) utti.data[0:to+1, :] = u1.data[indlast[:to+1], :] vtti.data[0:to+1, :] = u1.data[indlast[:to+1], :] solver_tti.forward(u=utti, v=vtti, kernel=kernel, time_M=10, src=src) normal_u = u.data[:] normal_utti = .5 * utti.data[:] normal_vtti = .5 * vtti.data[:] res = linalg.norm((normal_u - normal_utti - normal_vtti).reshape(-1))**2 res /= np.linalg.norm(normal_u.reshape(-1))**2 log("Difference between acoustic and TTI with all coefficients to 0 %2.4e" % res) assert np.isclose(res, 0.0, atol=1e-4)
from devito.logger import yask as log from devito.parameters import Parameters, configuration, add_sub_configuration from devito.tools import make_tempdir from devito.yask.dle import YaskRewriter from devito.yask.utils import namespace def exit(emsg): """ Handle fatal errors. """ raise InvalidOperator("YASK Error [%s]. Exiting..." % emsg) log("Backend initialization...") # Not all devito `platform`s are supported by YASK if isinstance(configuration['platform'], (Arm, Power)): raise ValueError("The YASK backend doesn't support platform `%s`" % configuration['platform']) # Some of the supported devito `platform`s (e.g., AMDs) may still be run through # YASK -- as they are x86-64 just like Intels -- but a proper `Platform` must be used if configuration['platform'] is CPU64: configuration['platform'] = 'intel64' try: import yask as yc # YASK compiler factories cfac = yc.yc_factory() nfac = yc.yc_node_factory()
def _specialize_iet(self, iet, **kwargs): """ Transform the Iteration/Expression tree to offload the computation of one or more loop nests onto YASK. This involves calling the YASK compiler to generate YASK code. Such YASK code is then called from within the transformed Iteration/Expression tree. """ mapper = {} self.yk_solns = OrderedDict() for n, (section, trees) in enumerate(find_affine_trees(iet).items()): dimensions = tuple(filter_ordered(i.dim.root for i in flatten(trees))) context = contexts.fetch(dimensions, self._dtype) # A unique name for the 'real' compiler and kernel solutions name = namespace['jit-soln'](Signer._digest(configuration, *[i.root for i in trees])) # Create a YASK compiler solution for this Operator yc_soln = context.make_yc_solution(name) try: # Generate YASK grids and populate `yc_soln` with equations local_grids = yaskit(trees, yc_soln) # Build the new IET nodes yk_soln_obj = YaskSolnObject(namespace['code-soln-name'](n)) funcall = make_sharedptr_funcall(namespace['code-soln-run'], ['time'], yk_soln_obj) funcall = Offloaded(funcall, self._dtype) mapper[trees[0].root] = funcall mapper.update({i.root: mapper.get(i.root) for i in trees}) # Drop trees # Mark `funcall` as an external function call self._func_table[namespace['code-soln-run']] = MetaCall(None, False) # JIT-compile the newly-created YASK kernel yk_soln = context.make_yk_solution(name, yc_soln, local_grids) self.yk_solns[(dimensions, yk_soln_obj)] = yk_soln # Print some useful information about the newly constructed solution log("Solution '%s' contains %d grid(s) and %d equation(s)." % (yc_soln.get_name(), yc_soln.get_num_grids(), yc_soln.get_num_equations())) except NotImplementedError as e: log("Unable to offload a candidate tree. Reason: [%s]" % str(e)) iet = Transformer(mapper).visit(iet) if not self.yk_solns: log("No offloadable trees found") # Some Iteration/Expression trees are not offloaded to YASK and may # require further processing to be executed in YASK, due to the differences # in storage layout employed by Devito and YASK yk_grid_objs = {i.name: YaskGridObject(i.name) for i in self._input if i.from_YASK} yk_grid_objs.update({i: YaskGridObject(i) for i in self._local_grids}) iet = make_grid_accesses(iet, yk_grid_objs) # Finally optimize all non-yaskized loops iet = super(OperatorYASK, self)._specialize_iet(iet, **kwargs) return iet