def hs_classify(scope): """ Return a mapper ``Function -> (Dimension -> [HaloLabel]`` describing what type of halo exchange is expected by the various :class:`TensorFunction`s in a :class:`Scope`. """ mapper = {} for f, r in scope.reads.items(): if not f.is_TensorFunction: continue elif f.grid is None: # TODO: improve me continue v = mapper.setdefault(f, {}) for i in r: for d in i.findices: if i.affine(d): if f.grid.is_distributed(d): if i.touch_halo(d): v.setdefault(d, []).append(STENCIL) else: v.setdefault(d, []).append(IDENTITY) else: v.setdefault(d, []).append(NONE) elif i.is_increment: # A read used for a distributed local-reduction. Users are expected # to deal with this data access pattern by themselves, for example # by resorting to common techniques such as redundant computation v.setdefault(d, []).append(UNSUPPORTED) elif i.irregular(d) and f.grid.is_distributed(d): v.setdefault(d, []).append(FULL) # Sanity check and reductions for f, v in mapper.items(): for d, hl in list(v.items()): unique_hl = set(hl) if unique_hl == {STENCIL, IDENTITY}: v[d] = STENCIL elif len(unique_hl) == 1: v[d] = unique_hl.pop() else: raise HaloSchemeException( "Inconsistency found while building a halo " "scheme for `%s` along Dimension `%s`" % (f, d)) # Drop functions needing no halo exchange mapper = { f: v for f, v in mapper.items() if any(i in [STENCIL, FULL] for i in v.values()) } # Emit a summary warning for f, v in mapper.items(): unsupported = [d for d, hl in v.items() if hl is UNSUPPORTED] if configuration['mpi'] and unsupported: warning("Distributed local-reductions over `%s` along " "Dimensions `%s` detected." % (f, unsupported)) return mapper
def __setstate__(self, state): soname = state.pop('_soname', None) binary = state.pop('binary', None) for k, v in state.items(): setattr(self, k, v) # If the `sonames` don't match, there *might* be a hidden bug as the # unpickled Operator might be generating code that differs from that # generated by the pickled Operator. For example, a stupid bug that we # had to fix was due to rebuilding SymPy expressions which weren't # automatically getting the flag `evaluate=False`, thus producing x+2 # on the unpickler instead of x+1+1). However, different `sonames` # doesn't necessarily means there's a bug: if the unpickler and the # pickler are two distinct processes and the unpickler runs with a # different `configuration` dictionary, then the `sonames` might indeed # be different, depending on which entries in `configuration` differ. if soname is not None: if soname != self._soname: warning( "The pickled and unpickled Operators have different .sonames; " "this might be a bug, or simply a harmless difference in " "`configuration`. You may check they produce the same code." ) self._compiler.save(self._soname, binary) self._lib = self._compiler.load(self._soname) self._lib.name = self._soname
def __init__(self, obj, r, gridpoints_data, coefficients_data): if not isinstance(r, int): raise TypeError('Need `r` int argument') if r <= 0: raise ValueError('`r` must be > 0') self.r = r self.obj = obj self._npoint = obj._npoint gridpoints = SubFunction(name="%s_gridpoints" % self.obj.name, dtype=np.int32, dimensions=(self.obj.indices[-1], Dimension(name='d')), shape=(self._npoint, self.obj.grid.dim), space_order=0, parent=self.obj) assert(gridpoints_data is not None) gridpoints.data[:] = gridpoints_data[:] self.obj._gridpoints = gridpoints interpolation_coeffs = SubFunction(name="%s_interpolation_coeffs" % self.obj.name, dimensions=(self.obj.indices[-1], Dimension(name='d'), Dimension(name='i')), shape=(self.obj.npoint, self.obj.grid.dim, self.r), dtype=self.obj.dtype, space_order=0, parent=self.obj) assert(coefficients_data is not None) interpolation_coeffs.data[:] = coefficients_data[:] self.obj._interpolation_coeffs = interpolation_coeffs warning("Ensure that the provided interpolation coefficient and grid point " + "values are computed on the final grid that will be used for other " + "computations.")
def _specialize_iet(self, iet, **kwargs): mapper = {} self._includes.append('ops_seq.h') ops_init = Call("ops_init", [0, 0, 2]) ops_timing = Call("ops_timing_output", [FunctionPointer("stdout")]) ops_exit = Call("ops_exit") global_declarations = [] dims = None for n, (section, trees) in enumerate(find_affine_trees(iet).items()): callable_kernel, declarations, par_loop_call_block, dims = opsit( trees, n) global_declarations.extend(declarations) self._header_functions.append(callable_kernel) mapper[trees[0].root] = par_loop_call_block mapper.update({i.root: mapper.get(i.root) for i in trees}) # Drop trees self._headers.append('#define OPS_%sD' % dims) warning("The OPS backend is still work-in-progress") global_declarations.append(Transformer(mapper).visit(iet)) return List( body=[ops_init, *global_declarations, ops_timing, ops_exit])
def wrapper(self, key, value=None): if key in self._deprecated: warning( "Trying to access deprecated config `%s`. Using `%s` instead" % (key, self._deprecated[key])) key = self._deprecated[key] return func(self, key, value)
def __init__(self, *args, **kwargs): if not self._cached(): super(TimeFunction, self).__init__(*args, **kwargs) self.time_dim = kwargs.get('time_dim', None) self.save = kwargs.get('save', None) time_order = kwargs.get('time_order', 1) if isinstance(time_order, int): self.time_order = time_order self._halo = ((time_order, 0),) + self._halo elif isinstance(time_order, tuple) and len(time_order) == 3: time_order, left_points, right_points = time_order self.time_order = time_order self._halo = ((left_points, right_points),) + self._halo else: raise ValueError("'space_order' must be int or 3-tuple of ints") self._padding = (kwargs.get('time_padding', (0, 0)),) + self._padding if self.save is not None: if not isinstance(self.save, int): raise ValueError("save must be an int indicating the number of " + "timesteps to be saved (is %s)" % type(self.save)) available_mem = virtual_memory().available if np.dtype(self.dtype).itemsize * self.save > available_mem: warning("Trying to allocate more memory for symbol %s " % self.name + "than available on physical device, this will start swapping") self.time_size = self.save else: self.time_size = self.time_order + 1 self.indices[0].modulo = self.time_size
def _indices(cls, **kwargs): """Return the default dimension indices for a given data shape :param grid: :class:`Grid` that defines the spatial domain. :param dimensions: Optional, list of :class:`Dimension` objects that defines data layout. :return: Dimension indices used for each axis. ..note:: Only one of :param grid: or :param dimensions: is required. """ grid = kwargs.get('grid', None) dimensions = kwargs.get('dimensions', None) if grid is None: if dimensions is None: error("Creating a Function object requries either " "a 'grid' or the 'dimensions' argument.") raise ValueError("Unknown symbol dimensions or shape") else: if dimensions is not None: warning( "Creating Function with 'grid' and 'dimensions' " "argument; ignoring the 'dimensions' and using 'grid'.") dimensions = grid.dimensions return dimensions
def __init__(self, *args, **kwargs): super(IntelCompiler, self).__init__(*args, **kwargs) self.cflags += ["-xhost"] language = kwargs.pop('language', configuration['language']) platform = kwargs.pop('platform', configuration['platform']) if platform is SKX: # Systematically use 512-bit vectors on skylake self.cflags += ["-qopt-zmm-usage=high"] try: if self.version >= version.StrictVersion("15.0.0"): # Append the OpenMP flag regardless of configuration['language'], # since icc15 and later versions implement OpenMP 4.0, hence # they support `#pragma omp simd` self.ldflags += ['-qopenmp'] except (TypeError, ValueError): if language == 'openmp': # Note: fopenmp, not qopenmp, is what is needed by icc versions < 15.0 self.ldflags += ['-fopenmp'] # Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is if kwargs.get('mpi'): ver = check_output([self.MPICC, "--version"]).decode("utf-8") if not ver.startswith("icc"): warning("The MPI compiler `%s` doesn't use the Intel " "C/C++ compiler underneath" % self.MPICC)
def is_on_device(maybe_symbol, gpu_fit, only_writes=False): """ True if all given Functions are allocated in the device memory, False otherwise. Parameters ---------- maybe_symbol : Indexed or Function or Node The inspected object. May be a single Indexed or Function, or even an entire piece of IET. gpu_fit : list of Function The Function's which are known to definitely fit in the device memory. This information is given directly by the user through the compiler option `gpu-fit` and is propagated down here through the various stages of lowering. only_writes : bool, optional Only makes sense if `maybe_symbol` is an IET. If True, ignore all Function's that do not appear on the LHS of at least one Expression. Defaults to False. """ try: functions = (maybe_symbol.function,) except AttributeError: assert maybe_symbol.is_Node iet = maybe_symbol functions = set(FindSymbols().visit(iet)) if only_writes: expressions = FindNodes(Expression).visit(iet) functions &= {i.write for i in expressions} fsave = [f for f in functions if f.is_TimeFunction and is_integer(f.save)] if 'all-fallback' in gpu_fit and fsave: warning("TimeFunction %s assumed to fit the GPU memory" % fsave) return True return all(f in gpu_fit for f in fsave)
def putdefault(self, grid): """ Derive a unique key ``K`` from a Grid`; if ``K`` is in ``self``, return the pre-existing YaskContext ``self[K]``, otherwise create a new context ``C``, set ``self[K] = C`` and return ``C``. """ assert grid is not None key = self._getkey(grid, grid.dtype) # Does a YaskContext exist already corresponding to this key? if key in self: return self[key] # Functions declared with explicit dimensions (i.e., with no Grid) must be # able to retrieve the right context partial_keys = [self._getkey(None, grid.dtype, i) for i in powerset(key[-1])] if any(i in self._partial_map for i in partial_keys if i[2]): warning("Non-unique Dimensions found in different contexts; dumping " "all known contexts. Perhaps you're attempting to use multiple " "Grids, and some of them share identical Dimensions? ") self.dump() # Create a new YaskContext context = YaskContext('ctx%d' % self._ncontexts, grid) self._ncontexts += 1 self[key] = context self._partial_map.update({i: context for i in partial_keys}) log("Context successfully created!")
def _make_parallel_tree(self, root, candidates): ncollapse = self._ncollapse(root, candidates) parallel = self.lang['for'](ncollapse) yask_add = namespace['code-grid-add'] # Introduce the `omp for` pragma mapper = OrderedDict() if root.is_ParallelAtomic: # Turn increments into atomic increments subs = {} for e in FindNodes(Expression).visit(root): if not e.is_Increment: continue # Try getting the increment components try: target, value, indices = split_increment(e.expr) except (AttributeError, ValueError): warning( "Found a parallelizable tree, but couldn't ompize it " "because couldn't understand the increment %s" % e.expr) return mapper # All good, can atomicize the increment subs[e] = e._rebuild( expr=e.expr.func(yask_add, target, (value, indices))) handle = Transformer(subs).visit(root) mapper[root] = handle._rebuild(pragmas=root.pragmas + (parallel, )) else: mapper[root] = root._rebuild(pragmas=root.pragmas + (parallel, )) return mapper
def putdefault(self, grid): """ Derive a key ``K`` from the :class:`Grid` ``grid``; if ``K`` in ``self``, return the existing :class:`YaskContext` ``self[K]``, otherwise create a new context ``C``, set ``self[K] = C`` and return ``C``. """ assert grid is not None key = self._getkey(grid, grid.dtype) # Does a YaskContext exist already corresponding to this key? if key in self: return self[key] # Functions declared with explicit dimensions (i.e., with no Grid) must be # able to retrieve the right context partial_keys = [ self._getkey(None, grid.dtype, i) for i in powerset(key[-1]) ] if any(i in self._partial_map for i in partial_keys if i[2]): warning( "Non-unique Dimensions found in different contexts; dumping " "all known contexts. Perhaps you're attempting to use multiple " "Grids, and some of them share identical Dimensions? ") self.dump() # Create a new YaskContext context = YaskContext('ctx%d' % self._ncontexts, grid) self._ncontexts += 1 self[key] = context self._partial_map.update({i: context for i in partial_keys}) log("Context successfully created!")
def _arg_values(self, args, interval, grid, **kwargs): """ Produce a map of argument values after evaluating user input. If no user input is provided, get a known value in ``args`` and adjust it so that no out-of-bounds memory accesses will be performed. The adjustment exploits the information in ``interval``, an Interval describing the Dimension data space. If no value is available in ``args``, use a default value. Parameters ---------- args : dict Known argument values. interval : Interval Description of the Dimension data space. grid : Grid Used for spacing overriding and MPI execution; if ``self`` is a distributed Dimension, then ``grid`` is used to translate user input into rank-local indices. **kwargs Dictionary of user-provided argument overrides. """ # Fetch user input and convert into rank-local values glb_minv = kwargs.pop(self.min_name, None) glb_maxv = kwargs.pop(self.max_name, kwargs.pop(self.name, None)) if grid is not None and grid.is_distributed(self): loc_minv, loc_maxv = grid.distributor.glb_to_loc( self, (glb_minv, glb_maxv)) else: loc_minv, loc_maxv = glb_minv, glb_maxv # If no user-override provided, use a suitable default value defaults = self._arg_defaults() if glb_minv is None: loc_minv = args.get(self.min_name, defaults[self.min_name]) try: loc_minv -= min(interval.lower, 0) except (AttributeError, TypeError): pass if glb_maxv is None: loc_maxv = args.get(self.max_name, defaults[self.max_name]) try: loc_maxv -= max(interval.upper, 0) except (AttributeError, TypeError): pass args = {self.min_name: loc_minv, self.max_name: loc_maxv} # Maybe override spacing if grid is not None: try: spacing_map = {k.name: v for k, v in grid.spacing_map.items()} args[self.spacing.name] = spacing_map[self.spacing.name] except KeyError: pass except AttributeError: # See issue #1524 warning("Unable to override spacing") return args
def grid(self): grids = {getattr(i, 'grid', None) for i in self._args_diff} - {None} if len(grids) > 1: warning("Expression contains multiple grids, returning first found") try: return grids.pop() except KeyError: raise ValueError("No grid found")
def run(expr): if expr.is_Integer: return nfac.new_const_number_node(int(expr)) elif expr.is_Float: return nfac.new_const_number_node(float(expr)) elif expr.is_Symbol: function = expr.base.function if function.is_Constant: if function not in self.mapper: self.mapper[function] = self.yc_soln.new_grid( function.name, []) return self.mapper[function].new_relative_grid_point([]) else: # A DSE-generated temporary, which must have already been # encountered as a LHS of a previous expression assert function in self.mapper return self.mapper[function] elif isinstance(expr, Indexed): function = expr.base.function if function not in self.mapper: if function.is_TimeFunction: dimensions = [ nfac.new_step_index(function.indices[0].name) ] dimensions += [ nfac.new_domain_index(i.name) for i in function.indices[1:] ] else: dimensions = [ nfac.new_domain_index(i.name) for i in function.indices ] self.mapper[function] = self.yc_soln.new_grid( function.name, dimensions) indices = [ int((i.origin if isinstance(i, LoweredDimension) else i) - j) for i, j in zip(expr.indices, function.indices) ] return self.mapper[function].new_relative_grid_point(indices) elif expr.is_Add: return nary2binary(expr.args, nfac.new_add_node) elif expr.is_Mul: return nary2binary(expr.args, nfac.new_multiply_node) elif expr.is_Pow: num, den = expr.as_numer_denom() if num == 1: return nfac.new_divide_node(run(num), run(den)) elif expr.is_Equality: if expr.lhs.is_Symbol: function = expr.lhs.base.function assert function not in self.mapper self.mapper[function] = run(expr.rhs) else: return nfac.new_equation_node(*[run(i) for i in expr.args]) else: warning("Missing handler in Devito-YASK translation") raise NotImplementedError
def __init__(self, *args, **kwargs): super(IntelKNLCompiler, self).__init__(*args, **kwargs) self.cflags += ["-xMIC-AVX512"] openmp = kwargs.pop('openmp', configuration['openmp']) if not openmp: warning("Running on Intel KNL without OpenMP is highly discouraged")
def rewrite(clusters, mode='advanced'): """ Given a sequence of N Clusters, produce a sequence of M Clusters with reduced operation count, with M >= N. Parameters ---------- clusters : list of Cluster The Clusters to be transformed. mode : str, optional The aggressiveness of the rewrite. Accepted: - ``noop``: Do nothing. - ``basic``: Apply common sub-expressions elimination. - ``advanced``: Apply all transformations that will reduce the operation count w/ minimum increase to the memory pressure, namely 'basic', factorization, and cross-iteration redundancy elimination ("CIRE") for time-invariants only. - ``aggressive``: Like 'advanced', but apply CIRE to time-varying sub-expressions too. Further, seek and drop cross-cluster redundancies (this is the only pass that attempts to optimize *across* Clusters, rather than within a Cluster). The 'aggressive' mode may substantially increase the symbolic processing time; it may or may not reduce the JIT-compilation time; it may or may not improve the overall runtime performance. """ if not (mode is None or isinstance(mode, str)): raise ValueError("Parameter 'mode' should be a string, not %s." % type(mode)) if mode is None or mode == 'noop': return clusters elif mode not in dse_registry: warning("Unknown rewrite mode(s) %s" % mode) return clusters # We use separate rewriters for dense and sparse clusters; sparse clusters have # non-affine index functions, thus making it basically impossible, in general, # to apply the more advanced DSE passes. # Note: the sparse rewriter uses the same template for temporaries as # the dense rewriter, thus temporaries are globally unique rewriter = modes[mode]() fallback = BasicRewriter(False, rewriter.template) states = [rewriter.run(c) if c.is_dense else fallback.run(c) for c in clusters] # Print out the profiling data print_profiling(states) # Different clusters may have created new (smaller) clusters which are # potentially groupable within a single cluster clusters = ClusterGroup(flatten([i.clusters for i in states])) clusters = groupby(clusters) return clusters.finalize()
def run(shape=(50, 50, 50), spacing=(20.0, 20.0, 20.0), tn=250.0, autotune=False, time_order=2, space_order=4, nbpml=10, kernel='centered', **kwargs): solver = tti_setup(shape, spacing, tn, space_order, nbpml, **kwargs) if space_order % 4 != 0: warning('WARNING: TTI requires a space_order that is a multiple of 4!') rec, u, v, summary = solver.forward(autotune=autotune, kernel=kernel) return summary.gflopss, summary.oi, summary.timings, [rec, u, v]
def _specialize_iet(self, iet, **kwargs): warning("The OPS backend is still work-in-progress") ops_init = Call(namespace['ops_init'], [0, 0, 2]) ops_partition = Call(namespace['ops_partition'], Literal('""')) ops_exit = Call(namespace['ops_exit']) ops_block = OpsBlock('block') # Extract all symbols that need to be converted to ops_dat dims = [] to_dat = set() for section, trees in find_affine_trees(iet).items(): dims.append(len(trees[0].dimensions)) symbols = set(FindSymbols('symbolics').visit(trees[0].root)) symbols -= set(FindSymbols('defines').visit(trees[0].root)) to_dat |= symbols # To ensure deterministic code generation we order the datasets to # be generated (since a set is an unordered collection) to_dat = filter_sorted(to_dat) name_to_ops_dat = {} pre_time_loop = [] for f in to_dat: if f.is_Constant: continue pre_time_loop.extend(create_ops_dat(f, name_to_ops_dat, ops_block)) for n, (section, trees) in enumerate(find_affine_trees(iet).items()): pre_loop, ops_kernel = opsit(trees, n) pre_time_loop.extend(pre_loop) self._ops_kernels.append(ops_kernel) assert (d == dims[0] for d in dims), \ "The OPS backend currently assumes that all kernels \ have the same number of dimensions" ops_block_init = Expression( ClusterizedEq( Eq(ops_block, namespace['ops_decl_block'](dims[0], Literal('"block"'))))) self._headers.append(namespace['ops_define_dimension'](dims[0])) self._includes.append('stdio.h') body = [ ops_init, ops_block_init, *pre_time_loop, ops_partition, iet, ops_exit ] return List(body=body)
def locate_intel_advisor(): try: path = Path(os.environ['ADVISOR_HOME']) # Little hack: assuming a 64bit system if path.joinpath('bin64').joinpath('advixe-cl').is_file(): return path else: warning("Requested `advisor` profiler, but couldn't locate executable") return None except KeyError: warning("Requested `advisor` profiler, but ADVISOR_HOME isn't set") return None
def _build(cls, expressions, **kwargs): # Sanity check passes = as_tuple(kwargs['mode']) for i in passes: if i not in cls._known_passes: if i in cls._known_passes_disabled: warning("Got explicit pass `%s`, but it's unsupported on an " "Operator of type `%s`" % (i, str(cls))) else: raise InvalidOperator("Unknown pass `%s`" % i) return super(DeviceOpenMPCustomOperator, cls)._build(expressions, **kwargs)
def locate_intel_advisor(): try: path = Path(os.environ['ADVISOR_HOME']) # Little hack: assuming a 64bit system if path.joinpath('bin64').joinpath('advixe-cl').is_file(): return path else: warning("Requested `advisor` profiler, but couldn't locate executable") return None except KeyError: warning("Requested `advisor` profiler, but ADVISOR_HOME isn't set") return None
def _remove_memmap_file(): """This method is used to clean up memmap file""" for f in MemmapManager._created_data: if MemmapManager._created_data[f]: try: os.remove(f) except OSError: warning( "error removing %s it may be already removed, skipping", f) else: warning("file %s has been left", f)
def homogenise_gpus(gpu_infos): if gpu_infos == []: warning('No graphics cards detected') return None if all_equal(gpu_infos): gpu_infos[0]['ncards'] = len(gpu_infos) return gpu_infos[0] warning('Different models of graphics cards detected') return None
def _at_callback(val): # noqa if isinstance(val, str): level, mode = val, at_default_mode[configuration['backend']] else: level, mode = val if level == 'off': level = False if configuration['backend'] == 'core' and mode == 'runtime': warning("Unsupported auto-tuning mode `runtime` with backend `core`") return at_setup(level, 'preemptive') else: return at_setup(level, mode)
def plot_field(field, xmin=0., xmax=2., ymin=0., ymax=2., zmin=None, zmax=None, view=None, linewidth=0): """ Utility plotting routine for 2D data. Parameters ---------- field : array_like Field data to plot. xmax : int, optional Length of the x-axis. ymax : int, optional Length of the y-axis. view: int, optional View point to intialise. """ if xmin > xmax or ymin > ymax: raise ValueError("Dimension min cannot be larger than dimension max.") if (zmin is not None and zmax is not None): if zmin > zmax: raise ValueError("Dimension min cannot be larger than dimension max.") elif(zmin is None and zmax is not None): if np.min(field) >= zmax: warning("zmax is less than field's minima. Figure deceptive.") elif(zmin is not None and zmax is None): if np.max(field) <= zmin: warning("zmin is larger than field's maxima. Figure deceptive.") x_coord = np.linspace(xmin, xmax, field.shape[0]) y_coord = np.linspace(ymin, ymax, field.shape[1]) fig = pyplot.figure(figsize=(11, 7), dpi=100) ax = fig.gca(projection='3d') X, Y = np.meshgrid(x_coord, y_coord, indexing='ij') ax.plot_surface(X, Y, field[:], cmap=cm.viridis, rstride=1, cstride=1, linewidth=linewidth, antialiased=False) # Enforce axis measures and set view if given ax.set_xlim(xmin, xmax) ax.set_ylim(ymin, ymax) if zmin is None: zmin = np.min(field) if zmax is None: zmax = np.max(field) ax.set_zlim(zmin, zmax) if view is not None: ax.view_init(*view) # Label axis ax.set_xlabel('$x$') ax.set_ylabel('$y$') pyplot.show()
def load(self): """ Load timing results from individually keyed files. """ for params in self.sweep(): filename = '%s_%s.json' % (self.name, self.param_string(params.items())) try: with open(path.join(self.resultsdir, filename), 'r') as f: datadict = json.loads(f.read()) self.timings[tuple(params.items())] = datadict['timings'] self.meta[tuple(params.items())] = datadict['meta'] except: warning("Could not load file: %s" % filename)
def _data_buffer(self): num_elements = self._data.grid.get_num_storage_elements() shape = self.shape_allocated ctype_1d = dtype_to_ctype(self.dtype) * reduce(mul, shape) if num_elements != reduce(mul, shape): warning("num_storage_elements(%d) != reduce(mul, %s)", num_elements, str(shape)) buf = ctypes.cast(int(self._data.grid.get_raw_storage_buffer()), ctypes.POINTER(ctype_1d)).contents return np.frombuffer(buf, dtype=self.dtype).reshape(shape)
def __call__(cls, *args, **kwargs): """ Create an instance of the request class for the current backend. """ # Try the selected backend first try: t = cls._backend.__dict__[cls.__name__] except KeyError as e: warning('Backend %s does not appear to implement class %s' % (cls._backend.__name__, cls.__name__)) raise e # Invoke the constructor with the arguments given return t(*args, **kwargs)
def __call__(cls, *args, **kwargs): """ Create an instance of the request class for the current backend. """ # Try the selected backend first try: t = cls._backend.__dict__[cls.__name__] except KeyError as e: warning('Backend %s does not appear to implement class %s' % (cls._backend.__name__, cls.__name__)) raise e # Invoke the constructor with the arguments given return t(*args, **kwargs)
def _arg_check(self, args, intervals): """ Check that ``args`` contains legal runtime values bound to ``self``. """ if self.name not in args: raise InvalidArgument("No runtime value for %s" % self.name) key = args[self.name] try: # Might be a plain number, w/o a dtype field if key.dtype != self.dtype: warning("Data type %s of runtime value `%s` does not match the " "Constant data type %s" % (key.dtype, self.name, self.dtype)) except AttributeError: pass
def _select_point_color(self, usercolor): if usercolor is None: return self.color[0] elif not self.fancycolor: return usercolor elif usercolor not in self.fancycolor.mapper: try: fancycolor = self.fancycolor.available.pop(0) self.fancycolor.mapper[usercolor] = fancycolor except IndexError: warning("No more fancycolor available") return fancycolor else: return self.fancycolor.mapper[usercolor]
def _data_buffer(self): num_elements = self._data.grid.get_num_storage_elements() shape = self.shape_allocated ctype_1d = dtype_to_ctype(self.dtype) * reduce(mul, shape) if num_elements != reduce(mul, shape): warning("num_storage_elements(%d) != reduce(mul, %s)", num_elements, str(shape)) buf = ctypes.cast( int(self._data.grid.get_raw_storage_buffer()), ctypes.POINTER(ctype_1d)).contents return np.frombuffer(buf, dtype=self.dtype).reshape(shape)
def _arg_check(self, args, intervals): """ Check that ``args`` contains legal runtime values bound to ``self``. """ if self.name not in args: raise InvalidArgument("No runtime value for %s" % self.name) key = args[self.name] try: # Might be a plain number, w/o a dtype field if key.dtype != self.dtype: warning("Data type %s of runtime value `%s` does not match the " "Constant data type %s" % (key.dtype, self.name, self.dtype)) except AttributeError: pass
def create_profile(name): """ Create a new :class:`Profiler`. """ level = configuration['profiling'] profiler = profiler_registry[level](name) if profiler.initialized: return profiler else: warning("Couldn't set up `%s` profiler; reverting to `basic`" % level) profiler = profiler_registry['basic'](name) # We expect the `basic` profiler to always initialize successfully assert profiler.initialized return profiler
def locate_intel_advisor(): """ Detect if Intel Advisor is installed on the machine and return its location if it is. """ path = None try: # Check if the directory to Intel Advisor is specified path = Path(os.environ['DEVITO_ADVISOR_DIR']) except KeyError: # Otherwise, 'sniff' the location of Advisor's directory error_msg = 'Intel Advisor cannot be found on your system, consider if you'\ ' have sourced its environment variables correctly. Information can'\ ' be found at https://software.intel.com/content/www/us/en/develop/'\ 'documentation/advisor-user-guide/top/launch-the-intel-advisor/'\ 'intel-advisor-cli/setting-and-using-intel-advisor-environment'\ '-variables.html' try: res = run(["advixe-cl", "--version"], stdout=PIPE, stderr=DEVNULL) ver = res.stdout.decode("utf-8") if not ver: error(error_msg) return None except (UnicodeDecodeError, FileNotFoundError): error(error_msg) return None env_path = os.environ["PATH"] env_path_dirs = env_path.split(":") for env_path_dir in env_path_dirs: # intel/advisor is the advisor directory for Intel Parallel Studio, # intel/oneapi/advisor is the directory for Intel oneAPI if "intel/advisor" in env_path_dir or "intel/oneapi/advisor" in env_path_dir: path = Path(env_path_dir) if path.name.startswith('bin'): path = path.parent if not path: error(error_msg) return None if path.joinpath('bin64').joinpath('advixe-cl').is_file(): return path else: warning("Requested `advisor` profiler, but couldn't locate executable" "in advisor directory") return None
def __init__(self, *args, **kwargs): if not self._cached(): self.time_dim = kwargs.get('time_dim', self.indices[self._time_position]) self._time_order = kwargs.get('time_order', 1) super(TimeFunction, self).__init__(*args, **kwargs) # Check we won't allocate too much memory for the system available_mem = virtual_memory().available if np.dtype(self.dtype).itemsize * self.size > available_mem: warning("Trying to allocate more memory for symbol %s " % self.name + "than available on physical device, this will start swapping") if not isinstance(self.time_order, int): raise TypeError("`time_order` must be int") self.save = kwargs.get('save')
def create_profile(name): """Create a new :class:`Profiler`.""" if configuration['log-level'] == 'DEBUG': # Enforce performance profiling in DEBUG mode level = 'advanced' else: level = configuration['profiling'] profiler = profiler_registry[level](name) if profiler.initialized: return profiler else: warning("Couldn't set up `%s` profiler; reverting to `advanced`" % level) profiler = profiler_registry['basic'](name) # We expect the `advanced` profiler to always initialize successfully assert profiler.initialized return profiler
def estimate_cost(expr, estimate_functions=False): """ Estimate the operation count of an expression. Parameters ---------- expr : expr-like or list of expr-like One or more expressions for which the operation count is calculated. estimate_functions : dict, optional A mapper from known functions (e.g., sin, cos) to (estimated) operation counts. """ external_functions = {sin: 50, cos: 50} try: # Is it a plain SymPy object ? iter(expr) except TypeError: expr = [expr] try: # Is it a dict ? expr = expr.values() except AttributeError: try: # Must be a list of dicts then expr = flatten([i.values() for i in expr]) except AttributeError: pass try: # At this point it must be a list of SymPy objects # We don't use SymPy's count_ops because we do not count integer arithmetic # (e.g., array index functions such as i+1 in A[i+1]) # Also, the routine below is *much* faster than count_ops expr = [i.rhs if i.is_Equality else i for i in expr] operations = flatten(retrieve_ops(i) for i in expr) flops = 0 for op in operations: if op.is_Function: if estimate_functions: flops += external_functions.get(op.__class__, 1) else: flops += 1 else: flops += len(op.args) - (1 + sum(True for i in op.args if i.is_Integer)) return flops except: warning("Cannot estimate cost of %s" % str(expr))
def set_backend(backend): """ Set the Devito backend. """ global _BackendSelector if _BackendSelector._backend != void: warning("WARNING: Switching backend to %s" % backend) try: # We need to pass a non-empty fromlist so that __import__ # returns the submodule (i.e. the backend) rather than the # package. mod = __import__('devito.%s' % backend, fromlist=['None']) except ImportError as e: warning('Unable to import backend %s' % backend) raise e backends[backend] = mod _BackendSelector._backend = mod
def wrapper(self): if self._data is None: debug("Allocating memory for %s%s" % (self.name, self.shape_allocated)) self._data = Data(self.shape_allocated, self.dtype, modulo=self._mask_modulo, allocator=self._allocator) if self._first_touch: assign(self, 0) if callable(self._initializer): if self._first_touch: warning("`first touch` together with `initializer` causing " "redundant data initialization") try: self._initializer(self.data_with_halo) except ValueError: # Perhaps user only wants to initialise the physical domain self._initializer(self.data) else: self.data_with_halo.fill(0) return func(self)
def _arg_values(self, args, interval, grid, **kwargs): if self.step.name in kwargs: value = kwargs.pop(self.step.name) if value <= args[self.root.max_name] - args[self.root.min_name] + 1: return {self.step.name: value} elif value < 0: raise ValueError("Illegale block size `%s=%d` (it should be > 0)" % (self.step.name, value)) else: # Avoid OOB warning("The specified block size `%s=%d` is bigger than the " "iteration range; shrinking it to `%s=1`." % (self.step.name, value, self.step.name)) return {self.step.name: 1} else: value = self._arg_defaults()[self.step.name] if value <= args[self.root.max_name] - args[self.root.min_name] + 1: return {self.step.name: value} else: # Avoid OOB return {self.step.name: 1}
def __setstate__(self, state): soname = state.pop('_soname', None) binary = state.pop('binary', None) for k, v in state.items(): setattr(self, k, v) # If the `sonames` don't match, there *might* be a hidden bug as the # unpickled Operator might be generating code that differs from that # generated by the pickled Operator. For example, a stupid bug that we # had to fix was due to rebuilding SymPy expressions which weren't # automatically getting the flag `evaluate=False`, thus producing x+2 # on the unpickler instead of x+1+1). However, different `sonames` # doesn't necessarily means there's a bug: if the unpickler and the # pickler are two distinct processes and the unpickler runs with a # different `configuration` dictionary, then the `sonames` might indeed # be different, depending on which entries in `configuration` differ. if soname is not None: if soname != self._soname: warning("The pickled and unpickled Operators have different .sonames; " "this might be a bug, or simply a harmless difference in " "`configuration`. You may check they produce the same code.") save(self._soname, binary, self._compiler)
def _arg_check(self, args, intervals): """ Check that ``args`` contains legal runtime values bound to ``self``. Raises ------ InvalidArgument If, given the runtime values ``args``, an out-of-bounds array access would be performed, or if shape/dtype don't match with self's shape/dtype. """ if self.name not in args: raise InvalidArgument("No runtime value for `%s`" % self.name) key = args[self.name] if len(key.shape) != self.ndim: raise InvalidArgument("Shape %s of runtime value `%s` does not match " "dimensions %s" % (key.shape, self.name, self.indices)) if key.dtype != self.dtype: warning("Data type %s of runtime value `%s` does not match the " "Function data type %s" % (key.dtype, self.name, self.dtype)) for i, s in zip(self.indices, key.shape): i._arg_check(args, s, intervals[i])
def __init__(self, *args, **kwargs): super(IntelCompiler, self).__init__(*args, **kwargs) self.cflags += ["-xhost"] if configuration['platform'] is SKX: # Systematically use 512-bit vectors on skylake self.cflags += ["-qopt-zmm-usage=high"] try: if self.version >= version.StrictVersion("15.0.0"): # Append the OpenMP flag regardless of configuration['openmp'], # since icc15 and later versions implement OpenMP 4.0, hence # they support `#pragma omp simd` self.ldflags += ['-qopenmp'] except (TypeError, ValueError): if configuration['openmp']: # Note: fopenmp, not qopenmp, is what is needed by icc versions < 15.0 self.ldflags += ['-fopenmp'] # Make sure the MPI compiler uses `icc` underneath -- whatever the MPI distro is if kwargs.get('mpi'): ver = check_output([self.MPICC, "--version"]).decode("utf-8") if not ver.startswith("icc"): warning("The MPI compiler `%s` doesn't use the Intel " "C/C++ compiler underneath" % self.MPICC)
def __init__(self, *args, **kwargs): if not self._cached(): super(PrecomputedSparseFunction, self).__init__(*args, **kwargs) # Grid points per sparse point (2 in the case of bilinear and trilinear) r = kwargs.get('r') if not isinstance(r, int): raise TypeError('Need `r` int argument') if r <= 0: raise ValueError('`r` must be > 0') self.r = r gridpoints = SubFunction(name="%s_gridpoints" % self.name, dtype=np.int32, dimensions=(self.indices[-1], Dimension(name='d')), shape=(self.npoint, self.grid.dim), space_order=0, parent=self) gridpoints_data = kwargs.get('gridpoints', None) assert(gridpoints_data is not None) gridpoints.data[:] = gridpoints_data[:] self._gridpoints = gridpoints interpolation_coeffs = SubFunction(name="%s_interpolation_coeffs" % self.name, dimensions=(self.indices[-1], Dimension(name='d'), Dimension(name='i')), shape=(self.npoint, self.grid.dim, self.r), dtype=self.dtype, space_order=0, parent=self) coefficients_data = kwargs.get('interpolation_coeffs', None) assert(coefficients_data is not None) interpolation_coeffs.data[:] = coefficients_data[:] self._interpolation_coeffs = interpolation_coeffs warning("Ensure that the provided interpolation coefficient and grid point " + "values are computed on the final grid that will be used for other " + "computations.")
def __init__(self, *args, **kwargs): super(IntelKNLCompiler, self).__init__(*args, **kwargs) self.cflags += ["-xMIC-AVX512"] if not configuration['openmp']: warning("Running on Intel KNL without OpenMP is highly discouraged")
def make_yask_ast(expr, yc_soln, mapper=None): def nary2binary(args, op): r = make_yask_ast(args[0], yc_soln, mapper) return r if len(args) == 1 else op(r, nary2binary(args[1:], op)) if mapper is None: mapper = {} if expr.is_Integer: return nfac.new_const_number_node(int(expr)) elif expr.is_Float: return nfac.new_const_number_node(float(expr)) elif expr.is_Rational: a, b = expr.as_numer_denom() return nfac.new_const_number_node(float(a)/float(b)) elif expr.is_Symbol: function = expr.function if function.is_Constant: # Create a YASK grid if it's the first time we encounter the embedded Function if function not in mapper: mapper[function] = yc_soln.new_grid(function.name, []) # Allow number of time-steps to be set in YASK kernel. mapper[function].set_dynamic_step_alloc(True) return mapper[function].new_grid_point([]) elif function.is_Dimension: if expr.is_Time: return nfac.new_step_index(expr.name) elif expr.is_Space: # `expr.root` instead of `expr` because YASK wants the SubDimension # information to be provided as if-conditions, and this is handled # a-posteriori directly by `yaskit` return nfac.new_domain_index(expr.root.name) else: return nfac.new_misc_index(expr.name) else: # E.g., A DSE-generated temporary, which must have already been # encountered as a LHS of a previous expression assert function in mapper return mapper[function] elif expr.is_Indexed: function = expr.function # Create a YASK grid if it's the first time we encounter the embedded Function if function not in mapper: dimensions = [make_yask_ast(i.root, yc_soln, mapper) for i in function.indices] mapper[function] = yc_soln.new_grid(function.name, dimensions) # Allow number of time-steps to be set in YASK kernel. mapper[function].set_dynamic_step_alloc(True) # We also get to know some relevant Dimension-related symbols # For example, the min point of the `x` Dimension, `x_m`, should # be mapped to YASK's `FIRST(x)` for d in function.indices: node = nfac.new_domain_index(d.name) mapper[d.symbolic_min] = nfac.new_first_domain_index(node) mapper[d.symbolic_max] = nfac.new_last_domain_index(node) indices = [make_yask_ast(i, yc_soln, mapper) for i in expr.indices] return mapper[function].new_grid_point(indices) elif expr.is_Add: return nary2binary(expr.args, nfac.new_add_node) elif expr.is_Mul: return nary2binary(expr.args, nfac.new_multiply_node) elif expr.is_Pow: base, exp = expr.as_base_exp() if not exp.is_integer: raise NotImplementedError("Non-integer powers unsupported in " "Devito-YASK translation") if int(exp) < 0: num, den = expr.as_numer_denom() return nfac.new_divide_node(make_yask_ast(num, yc_soln, mapper), make_yask_ast(den, yc_soln, mapper)) elif int(exp) >= 1: return nary2binary([base] * exp, nfac.new_multiply_node) else: warning("0-power found in Devito-YASK translation? setting to 1") return nfac.new_const_number_node(1) elif isinstance(expr, IntDiv): return nfac.new_divide_node(make_yask_ast(expr.lhs, yc_soln, mapper), make_yask_ast(expr.rhs, yc_soln, mapper)) elif expr.is_Equality: if expr.lhs.is_Symbol: function = expr.lhs.function # The IETs are always in SSA form, so the only situation in # which `function` may already appear in `mapper` is when we've # already processed it as part of a different set of # boundary conditions. For example consider `expr = a[x]*2`: # first time, expr executed iff `x == FIRST_INDEX(x) + 7` # second time, expr executed iff `x == FIRST_INDEX(x) + 6` if function not in mapper: mapper[function] = make_yask_ast(expr.rhs, yc_soln, mapper) else: return nfac.new_equation_node(*[make_yask_ast(i, yc_soln, mapper) for i in expr.args]) else: raise NotImplementedError("Missing handler in Devito-YASK translation")
def _specialize_iet(self, iet, **kwargs): warning("The OPS backend is still work-in-progress") return iet