def visit_Conditional(self, o): then_body = c.Block(self._visit(o.then_body)) if o.else_body: else_body = c.Block(self._visit(o.else_body)) return c.If(ccode(o.condition), then_body, else_body) else: return c.If(ccode(o.condition), then_body)
def __repr__(self): if self.else_body: return "<[%s] ? [%s] : [%s]>" %\ (ccode(self.condition), repr(self.then_body), repr(self.else_body)) else: return "<[%s] ? [%s]" % (ccode(self.condition), repr( self.then_body))
def visit_Conditional(self, o): then_body = c.Block(self.visit(o.then_body)) if o.else_body: else_body = c.Block(self.visit(o.else_body)) return c.If(ccode(o.condition), then_body, else_body) else: return c.If(ccode(o.condition), then_body)
def _specialize_iet(self, nodes): """Transform the Iteration/Expression tree to offload the computation of one or more loop nests onto YASK. This involves calling the YASK compiler to generate YASK code. Such YASK code is then called from within the transformed Iteration/Expression tree.""" log("Specializing a Devito Operator for YASK...") self.context = YaskNullContext() self.yk_soln = YaskNullKernel() offloadable = find_offloadable_trees(nodes) if len(offloadable) == 0: log("No offloadable trees found") elif len(offloadable) == 1: tree, grid, dtype = offloadable[0] self.context = contexts.fetch(grid, dtype) # Create a YASK compiler solution for this Operator yc_soln = self.context.make_yc_solution(namespace['jit-yc-soln']) transform = sympy2yask(self.context, yc_soln) try: for i in tree[-1].nodes: transform(i.expr) funcall = make_sharedptr_funcall(namespace['code-soln-run'], ['time'], namespace['code-soln-name']) funcall = Element(c.Statement(ccode(funcall))) nodes = Transformer({tree[1]: funcall}).visit(nodes) # Track /funcall/ as an external function call self.func_table[namespace['code-soln-run']] = MetaCall( None, False) # JIT-compile the newly-created YASK kernel local_grids = [i for i in transform.mapper if i.is_Array] self.yk_soln = self.context.make_yk_solution( namespace['jit-yk-soln'], yc_soln, local_grids) # Print some useful information about the newly constructed solution log("Solution '%s' contains %d grid(s) and %d equation(s)." % (yc_soln.get_name(), yc_soln.get_num_grids(), yc_soln.get_num_equations())) except: log("Unable to offload a candidate tree.") else: exit("Found more than one offloadable trees in a single Operator") # Some Iteration/Expression trees are not offloaded to YASK and may # require further processing to be executed in YASK, due to the differences # in storage layout employed by Devito and YASK nodes = make_grid_accesses(nodes) log("Specialization successfully performed!") return nodes
def __repr__(self): properties = "" if self.properties: properties = [str(i) for i in self.properties] properties = "WithProperties[%s]::" % ",".join(properties) index = self.index if self.uindices: index += '[%s]' % ','.join(ccode(i.index) for i in self.uindices) return "<%sIteration %s; %s>" % (properties, index, self.limits)
def visit_ArrayCast(self, o): """ Build cgen type casts for an :class:`AbstractFunction`. """ f = o.function align = "__attribute__((aligned(64)))" shape = ''.join(["[%s]" % ccode(j) for j in f.symbolic_shape[1:]]) lvalue = c.POD(f.dtype, '(*restrict %s)%s %s' % (f.name, shape, align)) rvalue = '(%s (*)%s) %s' % (c.dtype_to_ctype( f.dtype), shape, '%s_vec' % f.name) return c.Initializer(lvalue, rvalue)
def ccode(self): """Generate C code for the represented stencil loop :returns: :class:`cgen.For` object representing the loop """ loop_body = [s.ccode for s in self.nodes] # Start if self.offsets[0] != 0: start = "%s + %s" % (self.limits[0], -self.offsets[0]) try: start = eval(start) except (NameError, TypeError): pass else: start = self.limits[0] # Bound if self.offsets[1] != 0: end = "%s - %s" % (self.limits[1], self.offsets[1]) try: end = eval(end) except (NameError, TypeError): pass else: end = self.limits[1] # For reverse dimensions flip loop bounds if self.dim.reverse: loop_init = c.InlineInitializer(c.Value("int", self.index), ccode('%s - 1' % end)) loop_cond = '%s >= %s' % (self.index, ccode(start)) loop_inc = '%s -= %s' % (self.index, self.limits[2]) else: loop_init = c.InlineInitializer(c.Value("int", self.index), ccode(start)) loop_cond = '%s < %s' % (self.index, ccode(end)) loop_inc = '%s += %s' % (self.index, self.limits[2]) return c.For(loop_init, loop_cond, loop_inc, c.Block(loop_body))
def visit_ArrayCast(self, o): f = o.function # rvalue shape = ''.join("[%s]" % ccode(i) for i in o.castshape) if f.is_DiscreteFunction: rvalue = '(%s (*)%s) %s->%s' % (f._C_typedata, shape, f._C_name, f._C_field_data) else: rvalue = '(%s (*)%s) %s' % (f._C_typedata, shape, f._C_name) # lvalue lvalue = c.AlignedAttribute(f._data_alignment, c.Value(f._C_typedata, '(*restrict %s)%s' % (f.name, shape))) return c.Initializer(lvalue, rvalue)
def _args_call(self, args): """Generate cgen function call arguments from an iterable of symbols and expressions.""" ret = [] for i in args: try: if i.is_LocalObject: ret.append('&%s' % i._C_name) elif i.is_Array: ret.append("(%s)%s" % (i._C_typename, i.name)) else: ret.append(i._C_name) except AttributeError: ret.append(ccode(i)) return ret
def visit_Iteration(self, o): body = flatten(self.visit(i) for i in o.children) # Start if o.offsets[0] != 0: start = str(o.limits[0] + o.offsets[0]) try: start = eval(start) except (NameError, TypeError): pass else: start = o.limits[0] # Bound if o.offsets[1] != 0: end = str(o.limits[1] + o.offsets[1]) try: end = eval(end) except (NameError, TypeError): pass else: end = o.limits[1] # For backward direction flip loop bounds if o.direction == Backward: loop_init = 'int %s = %s' % (o.index, ccode(end)) loop_cond = '%s >= %s' % (o.index, ccode(start)) loop_inc = '%s -= %s' % (o.index, o.limits[2]) else: loop_init = 'int %s = %s' % (o.index, ccode(start)) loop_cond = '%s <= %s' % (o.index, ccode(end)) loop_inc = '%s += %s' % (o.index, o.limits[2]) # Append unbounded indices, if any if o.uindices: uinit = [ '%s = %s' % (i.name, ccode(i.symbolic_start)) for i in o.uindices ] loop_init = c.Line(', '.join([loop_init] + uinit)) ustep = [ '%s = %s' % (i.name, ccode(i.symbolic_incr)) for i in o.uindices ] loop_inc = c.Line(', '.join([loop_inc] + ustep)) # Create For header+body handle = c.For(loop_init, loop_cond, loop_inc, c.Block(body)) # Attach pragmas, if any if o.pragmas: handle = c.Module(o.pragmas + (handle, )) return handle
def _args_call(self, args): """Generate cgen function call arguments from an iterable of symbols and expressions.""" ret = [] for i in args: try: if i.is_Object: ret.append('*_%s' % i.name) elif i.is_Array: ret.append("(%s*)%s" % (c.dtype_to_ctype(i.dtype), i.name)) elif i.is_Symbol: ret.append(i.name) elif i.is_TensorFunction: ret.append('%s_vec' % i.name) except AttributeError: ret.append(ccode(i)) return ret
def _args_cast(self, args): """Build cgen type casts for an iterable of :class:`Argument`.""" ret = [] for i in args: if i.is_TensorArgument: align = "__attribute__((aligned(64)))" shape = ''.join( ["[%s]" % ccode(j) for j in i.provider.symbolic_shape[1:]]) lvalue = c.POD(i.dtype, '(*restrict %s)%s %s' % (i.name, shape, align)) rvalue = '(%s (*)%s) %s' % (c.dtype_to_ctype( i.dtype), shape, '%s_vec' % i.name) ret.append(c.Initializer(lvalue, rvalue)) elif i.is_PtrArgument: ctype = ctypes_to_C(i.dtype) lvalue = c.Pointer(c.Value(ctype, i.name)) rvalue = '(%s*) %s' % (ctype, '_%s' % i.name) ret.append(c.Initializer(lvalue, rvalue)) return ret
def visit_Iteration(self, o): body = flatten(self._visit(i) for i in o.children) # Start if o.offsets[0] != 0: _min = str(o.limits[0] + o.offsets[0]) try: _min = eval(_min) except (NameError, TypeError): pass else: _min = o.limits[0] # Bound if o.offsets[1] != 0: _max = str(o.limits[1] + o.offsets[1]) try: _max = eval(_max) except (NameError, TypeError): pass else: _max = o.limits[1] # For backward direction flip loop bounds if o.direction == Backward: loop_init = 'int %s = %s' % (o.index, ccode(_max)) loop_cond = '%s >= %s' % (o.index, ccode(_min)) loop_inc = '%s -= %s' % (o.index, o.limits[2]) else: loop_init = 'int %s = %s' % (o.index, ccode(_min)) loop_cond = '%s <= %s' % (o.index, ccode(_max)) loop_inc = '%s += %s' % (o.index, o.limits[2]) # Append unbounded indices, if any if o.uindices: uinit = ['%s = %s' % (i.name, ccode(i.symbolic_min)) for i in o.uindices] loop_init = c.Line(', '.join([loop_init] + uinit)) ustep = ['%s = %s' % (i.name, ccode(i.symbolic_incr)) for i in o.uindices] loop_inc = c.Line(', '.join([loop_inc] + ustep)) # Create For header+body handle = c.For(loop_init, loop_cond, loop_inc, c.Block(body)) # Attach pragmas, if any if o.pragmas: handle = c.Module(o.pragmas + (handle,)) return handle
def make_grid_accesses(node): """ Construct a new Iteration/Expression based on ``node``, in which all :class:`types.Indexed` accesses have been converted into YASK grid accesses. """ def make_grid_gets(expr): mapper = {} indexeds = retrieve_indexed(expr) data_carriers = [i for i in indexeds if i.base.function.from_YASK] for i in data_carriers: name = namespace['code-grid-name'](i.base.function.name) args = [ListInitializer([INT(make_grid_gets(j)) for j in i.indices])] mapper[i] = make_sharedptr_funcall(namespace['code-grid-get'], args, name) return expr.xreplace(mapper) mapper = {} for i, e in enumerate(FindNodes(Expression).visit(node)): lhs, rhs = e.expr.args # RHS translation rhs = make_grid_gets(rhs) # LHS translation if e.write.from_YASK: name = namespace['code-grid-name'](e.write.name) args = [rhs] args += [ListInitializer([INT(make_grid_gets(i)) for i in lhs.indices])] handle = make_sharedptr_funcall(namespace['code-grid-put'], args, name) processed = Element(c.Statement(ccode(handle))) else: # Writing to a scalar temporary processed = Expression(e.expr.func(lhs, rhs)) mapper.update({e: processed}) return Transformer(mapper).visit(node)
def _args_call(self, args): """Generate cgen function call arguments from an iterable of symbols and expressions.""" ret = [] for i in args: try: if isinstance(i, Call): res = self.visit(i) ret.append(res.text) elif i.is_LocalObject: ret.append('&%s' % i._C_name) elif i.is_Array: ret.append("(%s)%s" % (i._C_typename, i.name)) elif i.is_StringLiteral: ret.append('"%s"' % i.value) elif i.is_Function: ret.append("%s" % i.name) elif i.is_FunctionPointer: ret.append("%s" % i.fname) else: ret.append(i._C_name) except AttributeError: ret.append(ccode(i)) return ret
def _specialize_iet(self, iet, **kwargs): """ Transform the Iteration/Expression tree to offload the computation of one or more loop nests onto YASK. This involves calling the YASK compiler to generate YASK code. Such YASK code is then called from within the transformed Iteration/Expression tree. """ offloadable = find_offloadable_trees(iet) if len(offloadable.trees) == 0: self.yk_soln = YaskNullKernel() log("No offloadable trees found") else: context = contexts.fetch(offloadable.grid, offloadable.dtype) # A unique name for the 'real' compiler and kernel solutions name = namespace['jit-soln'](Signer._digest(iet, configuration)) # Create a YASK compiler solution for this Operator yc_soln = context.make_yc_solution(name) try: trees = offloadable.trees # Generate YASK grids and populate `yc_soln` with equations mapper = yaskizer(trees, yc_soln) local_grids = [i for i in mapper if i.is_Array] # Transform the IET funcall = make_sharedptr_funcall(namespace['code-soln-run'], ['time'], namespace['code-soln-name']) funcall = Element(c.Statement(ccode(funcall))) mapper = {trees[0].root: funcall} mapper.update({i.root: mapper.get(i.root) for i in trees}) # Drop trees iet = Transformer(mapper).visit(iet) # Mark `funcall` as an external function call self.func_table[namespace['code-soln-run']] = MetaCall( None, False) # JIT-compile the newly-created YASK kernel self.yk_soln = context.make_yk_solution( name, yc_soln, local_grids) # Print some useful information about the newly constructed solution log("Solution '%s' contains %d grid(s) and %d equation(s)." % (yc_soln.get_name(), yc_soln.get_num_grids(), yc_soln.get_num_equations())) except NotImplementedError as e: self.yk_soln = YaskNullKernel() log("Unable to offload a candidate tree. Reason: [%s]" % str(e)) # Some Iteration/Expression trees are not offloaded to YASK and may # require further processing to be executed in YASK, due to the differences # in storage layout employed by Devito and YASK iet = make_grid_accesses(iet) # Finally optimize all non-yaskized loops iet = super(Operator, self)._specialize_iet(iet, **kwargs) return iet
def visit_ForeignExpression(self, o): return c.Statement(ccode(o.expr))
def visit_LocalExpression(self, o): return c.Initializer( c.Value(c.dtype_to_ctype(o.dtype), ccode(o.expr.lhs, dtype=o.dtype)), ccode(o.expr.rhs, dtype=o.dtype))
def visit_Expression(self, o): return c.Assign(ccode(o.expr.lhs, dtype=o.dtype), ccode(o.expr.rhs, dtype=o.dtype))
def ccode(self): ctype = c.dtype_to_ctype(self.dtype) return c.Initializer(c.Value(ctype, ccode(self.expr.lhs)), ccode(self.expr.rhs))
def _minimize_remainders(self, iet): """ Reshape temporary tensors and adjust loop trip counts to prevent as many compiler-generated remainder loops as possible. """ # The innermost dimension is the one that might get padded p_dim = -1 mapper = {} for tree in retrieve_iteration_tree(iet): vector_iterations = [i for i in tree if i.is_Vectorizable] if not vector_iterations or len(vector_iterations) > 1: continue root = vector_iterations[0] # Padding writes = [i.write for i in FindNodes(Expression).visit(root) if i.write.is_Array] padding = [] for i in writes: try: simd_items = self.platform.simd_items_per_reg(i.dtype) except KeyError: return iet, {} padding.append(simd_items - i.shape[-1] % simd_items) if len(set(padding)) == 1: padding = padding[0] for i in writes: padded = (i._padding[p_dim][0], i._padding[p_dim][1] + padding) i.update(padding=i._padding[:p_dim] + (padded,)) else: # Padding must be uniform -- not the case, so giving up continue # Dynamic trip count adjustment endpoint = root.symbolic_max if not endpoint.is_Symbol: continue condition = [] externals = set(i.symbolic_shape[-1] for i in FindSymbols().visit(root) if i.is_Tensor) for i in root.uindices: for j in externals: condition.append(root.symbolic_max + padding < j) condition = ' && '.join(ccode(i) for i in condition) endpoint_padded = endpoint.func('_%s' % endpoint.name) init = cgen.Initializer( cgen.Value("const int", endpoint_padded), cgen.Line('(%s) ? %s : %s' % (condition, ccode(endpoint + padding), endpoint)) ) # Update the Iteration bound limits = list(root.limits) limits[1] = endpoint_padded.func(endpoint_padded.name) rebuilt = list(tree) rebuilt[rebuilt.index(root)] = root._rebuild(limits=limits) mapper[tree[0]] = List(header=init, body=compose_nodes(rebuilt)) processed = Transformer(mapper).visit(iet) return processed, {}
def _minimize_remainders(self, state, **kwargs): """ Reshape temporary tensors and adjust loop trip counts to prevent as many compiler-generated remainder loops as possible. """ mapper = {} for tree in retrieve_iteration_tree(state.nodes + state.elemental_functions): vector_iterations = [i for i in tree if i.is_Vectorizable] if not vector_iterations: continue assert len(vector_iterations) == 1 root = vector_iterations[0] if root.tag is None: continue # Padding writes = [ i for i in FindSymbols('symbolics-writes').visit(root) if i.is_TensorFunction ] padding = [] for i in writes: try: simd_items = get_simd_items(i.dtype) except KeyError: # Fallback to 16 (maximum expectable padding, for AVX512 registers) simd_items = simdinfo['avx512f'] / np.dtype( i.dtype).itemsize padding.append(simd_items - i.shape[-1] % simd_items) if len(set(padding)) == 1: padding = padding[0] for i in writes: i.update(shape=i.shape[:-1] + (i.shape[-1] + padding, )) else: # Padding must be uniform -- not the case, so giving up continue # Dynamic trip count adjustment endpoint = root.end_symbolic if not endpoint.is_Symbol: continue condition = [] externals = set(i.symbolic_shape[-1] for i in FindSymbols().visit(root)) for i in root.uindices: for j in externals: condition.append(root.end_symbolic + padding < j) condition = ' || '.join(ccode(i) for i in condition) endpoint_padded = endpoint.func(name='_%s' % endpoint.name) init = cgen.Initializer( cgen.Value("const int", endpoint_padded), cgen.Line('(%s) ? %s : %s' % (condition, ccode(endpoint + padding), endpoint))) # Update the Iteration bound limits = list(root.limits) limits[1] = endpoint_padded.func(endpoint_padded.name) rebuilt = list(tree) rebuilt[rebuilt.index(root)] = root._rebuild(limits=limits) mapper[tree[0]] = List(header=init, body=compose_nodes(rebuilt)) nodes = Transformer(mapper).visit(state.nodes) elemental_functions = Transformer(mapper).visit( state.elemental_functions) return {'nodes': nodes, 'elemental_functions': elemental_functions}
def visit_Expression(self, o): return c.Assign(ccode(o.expr.lhs), ccode(o.expr.rhs))
def _specialize(self, nodes, parameters): """ Create a YASK representation of this Iteration/Expression tree. ``parameters`` is modified in-place adding YASK-related arguments. """ log("Specializing a Devito Operator for YASK...") # Find offloadable Iteration/Expression trees offloadable = [] for tree in retrieve_iteration_tree(nodes): parallel = filter_iterations(tree, lambda i: i.is_Parallel) if not parallel: # Cannot offload non-parallel loops continue if not (IsPerfectIteration().visit(tree) and all(i.is_Expression for i in tree[-1].nodes)): # Don't know how to offload this Iteration/Expression to YASK continue functions = flatten(i.functions for i in tree[-1].nodes) keys = set((i.indices, i.shape, i.dtype) for i in functions if i.is_TimeData) if len(keys) == 0: continue elif len(keys) > 1: exit("Cannot handle Operators w/ heterogeneous grids") dimensions, shape, dtype = keys.pop() if len(dimensions) == len(tree) and\ all(i.dim == j for i, j in zip(tree, dimensions)): # Detected a "full" Iteration/Expression tree (over both # time and space dimensions) offloadable.append((tree, dimensions, shape, dtype)) # Construct YASK ASTs given Devito expressions. New grids may be allocated. if len(offloadable) == 0: # No offloadable trees found self.context = YaskNullContext() self.yk_soln = YaskNullSolution() processed = nodes log("No offloadable trees found") elif len(offloadable) == 1: # Found *the* offloadable tree for this Operator tree, dimensions, shape, dtype = offloadable[0] self.context = contexts.fetch(dimensions, shape, dtype) # Create a YASK compiler solution for this Operator # Note: this can be dropped as soon as the kernel has been built yc_soln = self.context.make_yc_solution(namespace['jit-yc-soln']) transform = sympy2yask(self.context, yc_soln) try: for i in tree[-1].nodes: transform(i.expr) funcall = make_sharedptr_funcall(namespace['code-soln-run'], ['time'], namespace['code-soln-name']) funcall = Element(c.Statement(ccode(funcall))) processed = Transformer({tree[1]: funcall}).visit(nodes) # Track this is an external function call self.func_table[namespace['code-soln-run']] = FunMeta( None, False) # JIT-compile the newly-created YASK kernel self.yk_soln = self.context.make_yk_solution( namespace['jit-yk-soln'], yc_soln) # Now we must drop a pointer to the YASK solution down to C-land parameters.append( Object(namespace['code-soln-name'], namespace['type-solution'], self.yk_soln.rawpointer)) # Print some useful information about the newly constructed solution log("Solution '%s' contains %d grid(s) and %d equation(s)." % (yc_soln.get_name(), yc_soln.get_num_grids(), yc_soln.get_num_equations())) except: self.yk_soln = YaskNullSolution() processed = nodes log("Unable to offload a candidate tree.") else: exit("Found more than one offloadable trees in a single Operator") # Some Iteration/Expression trees are not offloaded to YASK and may # require further processing to be executed in YASK, due to the differences # in storage layout employed by Devito and YASK processed = make_grid_accesses(processed) # Update the parameters list adding all necessary YASK grids for i in list(parameters): try: if i.from_YASK: parameters.append( Object(namespace['code-grid-name'](i.name), namespace['type-grid'], i.data.rawpointer)) except AttributeError: # Ignore e.g. Dimensions pass log("Specialization successfully performed!") return processed
def visit_Expression(self, o): return c.Assign(ccode(o.expr.lhs, dtype=o.dtype), ccode(o.expr.rhs, dtype=o.dtype))
def visit_Increment(self, o): return c.Statement("%s += %s" % (ccode(o.expr.lhs, dtype=o.dtype), ccode(o.expr.rhs, dtype=o.dtype)))
def visit_LocalExpression(self, o): return c.Initializer(c.Value(dtype_to_cstr(o.dtype), ccode(o.expr.lhs, dtype=o.dtype)), ccode(o.expr.rhs, dtype=o.dtype))
def visit_ForeignExpression(self, o): return c.Statement(ccode(o.expr))
def _create_elemental_functions(self, nodes, state): """ Extract :class:`Iteration` sub-trees and move them into :class:`Callable`s. Currently, only tagged, elementizable Iteration objects are targeted. """ noinline = self._compiler_decoration('noinline', c.Comment('noinline?')) functions = OrderedDict() mapper = {} for tree in retrieve_iteration_tree(nodes, mode='superset'): # Search an elementizable sub-tree (if any) tagged = filter_iterations(tree, lambda i: i.tag is not None, 'asap') if not tagged: continue root = tagged[0] if not root.is_Elementizable: continue target = tree[tree.index(root):] # Elemental function arguments args = [] # Found so far (scalars, tensors) maybe_required = set() # Scalars that *may* have to be passed in not_required = set() # Elemental function locally declared scalars # Build a new Iteration/Expression tree with free bounds free = [] for i in target: name, bounds = i.dim.name, i.bounds_symbolic # Iteration bounds start = Scalar(name='%s_start' % name, dtype=np.int32) finish = Scalar(name='%s_finish' % name, dtype=np.int32) args.extend(zip([ccode(j) for j in bounds], (start, finish))) # Iteration unbounded indices ufunc = [ Scalar(name='%s_ub%d' % (name, j), dtype=np.int32) for j in range(len(i.uindices)) ] args.extend(zip([ccode(j.start) for j in i.uindices], ufunc)) limits = [Symbol(start.name), Symbol(finish.name), 1] uindices = [ UnboundedIndex(j.index, i.dim + as_symbol(k)) for j, k in zip(i.uindices, ufunc) ] free.append( i._rebuild(limits=limits, offsets=None, uindices=uindices)) not_required.update({i.dim}, set(j.index for j in i.uindices)) # Construct elemental function body, and inspect it free = NestedTransformer(dict((zip(target, free)))).visit(root) expressions = FindNodes(Expression).visit(free) fsymbols = FindSymbols('symbolics').visit(free) # Add all definitely-required arguments not_required.update({i.output for i in expressions if i.is_scalar}) for i in fsymbols: if i in not_required: continue elif i.is_Array: args.append( ("(%s*)%s" % (c.dtype_to_ctype(i.dtype), i.name), i)) elif i.is_TensorFunction: args.append(("%s_vec" % i.name, i)) elif i.is_Scalar: args.append((i.name, i)) # Add all maybe-required arguments that turn out to be required maybe_required.update( set(FindSymbols(mode='free-symbols').visit(free))) for i in fsymbols: not_required.update({as_symbol(i), i.indexify()}) for j in i.symbolic_shape: maybe_required.update(j.free_symbols) required = filter_sorted(maybe_required - not_required, key=attrgetter('name')) args.extend([(i.name, Scalar(name=i.name, dtype=i.dtype)) for i in required]) call, params = zip(*args) handle = flatten([p.rtargs for p in params]) name = "f_%d" % root.tag # Produce the new Call mapper[root] = List(header=noinline, body=Call(name, call)) # Produce the new Callable functions.setdefault( name, Callable(name, free, 'void', handle, ('static', ))) # Transform the main tree processed = Transformer(mapper).visit(nodes) return processed, {'elemental_functions': functions.values()}
def _minimize_remainders(self, iet): """ Reshape temporary tensors and adjust loop trip counts to prevent as many compiler-generated remainder loops as possible. """ # The innermost dimension is the one that might get padded p_dim = -1 mapper = {} for tree in retrieve_iteration_tree(iet): vector_iterations = [i for i in tree if i.is_Vectorizable] if not vector_iterations or len(vector_iterations) > 1: continue root = vector_iterations[0] # Padding writes = [i.write for i in FindNodes(Expression).visit(root) if i.write.is_Array] padding = [] for i in writes: try: simd_items = self.platform.simd_items_per_reg(i.dtype) except KeyError: return iet, {} padding.append(simd_items - i.shape[-1] % simd_items) if len(set(padding)) == 1: padding = padding[0] for i in writes: padded = (i._padding[p_dim][0], i._padding[p_dim][1] + padding) i.update(padding=i._padding[:p_dim] + (padded,)) else: # Padding must be uniform -- not the case, so giving up continue # Dynamic trip count adjustment endpoint = root.symbolic_max if not endpoint.is_Symbol: continue condition = [] externals = set(i.symbolic_shape[-1] for i in FindSymbols().visit(root) if i.is_Tensor) for i in root.uindices: for j in externals: condition.append(root.symbolic_max + padding < j) condition = ' && '.join(ccode(i) for i in condition) endpoint_padded = endpoint.func('_%s' % endpoint.name) init = cgen.Initializer( cgen.Value("const int", endpoint_padded), cgen.Line('(%s) ? %s : %s' % (condition, ccode(endpoint + padding), endpoint)) ) # Update the Iteration bound limits = list(root.limits) limits[1] = endpoint_padded.func(endpoint_padded.name) rebuilt = list(tree) rebuilt[rebuilt.index(root)] = root._rebuild(limits=limits) mapper[tree[0]] = List(header=init, body=compose_nodes(rebuilt)) processed = Transformer(mapper).visit(iet) return processed, {}
def _specialize(self, nodes, parameters): """ Create a YASK representation of this Iteration/Expression tree. ``parameters`` is modified in-place adding YASK-related arguments. """ log("Specializing a Devito Operator for YASK...") self.context = YaskNullContext() self.yk_soln = YaskNullKernel() local_grids = [] offloadable = find_offloadable_trees(nodes) if len(offloadable) == 0: log("No offloadable trees found") elif len(offloadable) == 1: tree, grid, dtype = offloadable[0] self.context = contexts.fetch(grid, dtype) # Create a YASK compiler solution for this Operator yc_soln = self.context.make_yc_solution(namespace['jit-yc-soln']) transform = sympy2yask(self.context, yc_soln) try: for i in tree[-1].nodes: transform(i.expr) funcall = make_sharedptr_funcall(namespace['code-soln-run'], ['time'], namespace['code-soln-name']) funcall = Element(c.Statement(ccode(funcall))) nodes = Transformer({tree[1]: funcall}).visit(nodes) # Track /funcall/ as an external function call self.func_table[namespace['code-soln-run']] = MetaCall( None, False) # JIT-compile the newly-created YASK kernel local_grids += [i for i in transform.mapper if i.is_Array] self.yk_soln = self.context.make_yk_solution( namespace['jit-yk-soln'], yc_soln, local_grids) # Now we must drop a pointer to the YASK solution down to C-land parameters.append( Object(namespace['code-soln-name'], namespace['type-solution'], self.yk_soln.rawpointer)) # Print some useful information about the newly constructed solution log("Solution '%s' contains %d grid(s) and %d equation(s)." % (yc_soln.get_name(), yc_soln.get_num_grids(), yc_soln.get_num_equations())) except: log("Unable to offload a candidate tree.") else: exit("Found more than one offloadable trees in a single Operator") # Some Iteration/Expression trees are not offloaded to YASK and may # require further processing to be executed in YASK, due to the differences # in storage layout employed by Devito and YASK nodes = make_grid_accesses(nodes) # Update the parameters list adding all necessary YASK grids for i in list(parameters) + local_grids: try: if i.from_YASK: parameters.append( Object(namespace['code-grid-name'](i.name), namespace['type-grid'])) except AttributeError: # Ignore e.g. Dimensions pass log("Specialization successfully performed!") return nodes
def ccode(self): return c.Assign(ccode(self.expr.lhs), ccode(self.expr.rhs))