def visit_Iteration(self, o): body = flatten(self._visit(i) for i in o.children) # Start if o.offsets[0] != 0: _min = str(o.limits[0] + o.offsets[0]) try: _min = eval(_min) except (NameError, TypeError): pass else: _min = o.limits[0] # Bound if o.offsets[1] != 0: _max = str(o.limits[1] + o.offsets[1]) try: _max = eval(_max) except (NameError, TypeError): pass else: _max = o.limits[1] # For backward direction flip loop bounds if o.direction == Backward: loop_init = 'int %s = %s' % (o.index, ccode(_max)) loop_cond = '%s >= %s' % (o.index, ccode(_min)) loop_inc = '%s -= %s' % (o.index, o.limits[2]) else: loop_init = 'int %s = %s' % (o.index, ccode(_min)) loop_cond = '%s <= %s' % (o.index, ccode(_max)) loop_inc = '%s += %s' % (o.index, o.limits[2]) # Append unbounded indices, if any if o.uindices: uinit = [ '%s = %s' % (i.name, ccode(i.symbolic_min)) for i in o.uindices ] loop_init = c.Line(', '.join([loop_init] + uinit)) ustep = [ '%s = %s' % (i.name, ccode(i.symbolic_incr)) for i in o.uindices ] loop_inc = c.Line(', '.join([loop_inc] + ustep)) # Create For header+body handle = c.For(loop_init, loop_cond, loop_inc, c.Block(body)) # Attach pragmas, if any if o.pragmas: handle = c.Module(o.pragmas + (handle, )) return handle
def _args_call(self, args): """Generate cgen function call arguments from an iterable of symbols and expressions.""" ret = [] for i in args: try: if isinstance(i, Call): ret.append(self.visit(i, nested_call=True)) elif isinstance(i, Lambda): ret.append(self.visit(i)) elif i.is_LocalObject: ret.append('&%s' % i._C_name) elif i.is_ArrayBasic: ret.append("(%s)%s" % (i._C_typename, i.name)) else: ret.append(i._C_name) except AttributeError: ret.append(ccode(i)) return ret
def visit_PointerCast(self, o): f = o.function if isinstance(o.obj, VoidPointer): obj = o.obj.name elif isinstance(o.obj, ArrayObject): obj = '%s->%s' % (o.obj.name, f._C_name) else: obj = f._C_name if f.is_PointerArray: rvalue = '(%s**) %s' % (f._C_typedata, obj) lvalue = c.Value(f._C_typedata, '**%s' % f.name) else: shape = ''.join("[%s]" % ccode(i) for i in o.castshape) if f.is_DiscreteFunction: rvalue = '(%s (*)%s) %s->%s' % (f._C_typedata, shape, obj, f._C_field_data) else: rvalue = '(%s (*)%s) %s' % (f._C_typedata, shape, obj) lvalue = c.AlignedAttribute( f._data_alignment, c.Value(f._C_typedata, '(*restrict %s)%s' % (f.name, shape))) return c.Initializer(lvalue, rvalue)
def visit_Dereference(self, o): a0, a1 = o.functions if a1.is_PointerArray or a1.is_TempFunction: if o.flat is None: shape = ''.join("[%s]" % ccode(i) for i in a0.symbolic_shape[1:]) rvalue = '(%s (*)%s) %s[%s]' % (a1._C_typedata, shape, a1.name, a1.dim.name) lvalue = c.AlignedAttribute( a0._data_alignment, c.Value(a0._C_typedata, '(*restrict %s)%s' % (a0.name, shape))) else: rvalue = '(%s *) %s[%s]' % (a1._C_typedata, a1.name, a1.dim.name) lvalue = c.AlignedAttribute( a0._data_alignment, c.Value(a0._C_typedata, '*restrict %s' % o.flat)) else: rvalue = '%s->%s' % (a1.name, a0._C_name) lvalue = c.Value(a0._C_typename, a0._C_name) return c.Initializer(lvalue, rvalue)
def visit_Iteration(self, o): body = flatten( self._visit(i) for i in self._blankline_logic(o.children)) _min = o.limits[0] _max = o.limits[1] # For backward direction flip loop bounds if o.direction == Backward: loop_init = 'int %s = %s' % (o.index, ccode(_max)) loop_cond = '%s >= %s' % (o.index, ccode(_min)) loop_inc = '%s -= %s' % (o.index, o.limits[2]) else: loop_init = 'int %s = %s' % (o.index, ccode(_min)) loop_cond = '%s <= %s' % (o.index, ccode(_max)) loop_inc = '%s += %s' % (o.index, o.limits[2]) # Append unbounded indices, if any if o.uindices: uinit = [ '%s = %s' % (i.name, ccode(i.symbolic_min)) for i in o.uindices ] loop_init = c.Line(', '.join([loop_init] + uinit)) ustep = [] for i in o.uindices: op = '=' if i.is_Modulo else '+=' ustep.append('%s %s %s' % (i.name, op, ccode(i.symbolic_incr))) loop_inc = c.Line(', '.join([loop_inc] + ustep)) # Create For header+body handle = c.For(loop_init, loop_cond, loop_inc, c.Block(body)) # Attach pragmas, if any if o.pragmas: handle = c.Module(o.pragmas + (handle, )) return handle
def visit_ForeignExpression(self, o): return c.Statement(ccode(o.expr))
def visit_LocalExpression(self, o): return c.Initializer( c.Value(dtype_to_cstr(o.dtype), ccode(o.expr.lhs, dtype=o.dtype)), ccode(o.expr.rhs, dtype=o.dtype))
def visit_Increment(self, o): return c.Statement("%s += %s" % (ccode( o.expr.lhs, dtype=o.dtype), ccode(o.expr.rhs, dtype=o.dtype)))
def visit_Expression(self, o): return c.Assign(ccode(o.expr.lhs, dtype=o.dtype), ccode(o.expr.rhs, dtype=o.dtype))
def visit_AugmentedExpression(self, o): return c.Statement("%s %s= %s" % (ccode(o.expr.lhs, dtype=o.dtype), o.op, ccode(o.expr.rhs, dtype=o.dtype)))
def visit_Expression(self, o): code = (c.Assign(ccode(o.expr.lhs, dtype=o.dtype), ccode(o.expr.rhs, dtype=o.dtype))) if o.pragmas: code = c.Module(list(o.pragmas) + [code]) return code
def __repr__(self): if self.else_body: return "<[%s] ? [%s] : [%s]>" %\ (ccode(self.condition), repr(self.then_body), repr(self.else_body)) else: return "<[%s] ? [%s]" % (ccode(self.condition), repr(self.then_body))
def linearize_accesses(iet, cache, sregistry): """ Turn Indexeds into FIndexeds and create the necessary access Macros. """ # Find all objects amenable to linearization symbol_names = {i.name for i in FindSymbols('indexeds').visit(iet)} functions = [f for f in FindSymbols().visit(iet) if ((f.is_DiscreteFunction or f.is_Array) and f.ndim > 1 and f.name in symbol_names)] functions = sorted(functions, key=lambda f: len(f.dimensions), reverse=True) # Find unique sizes (unique -> minimize necessary registers) mapper = DefaultOrderedDict(list) for f in functions: if f not in cache: # NOTE: the outermost dimension is unnecessary for d in f.dimensions[1:]: # TODO: same grid + same halo => same padding, however this is # never asserted throughout the compiler yet... maybe should do # it when in debug mode at `prepare_arguments` time, ie right # before jumping to C? mapper[(d, f._size_halo[d], getattr(f, 'grid', None))].append(f) # Build all exprs such as `x_fsz0 = u_vec->size[1]` imapper = DefaultOrderedDict(list) for (d, halo, _), v in mapper.items(): name = sregistry.make_name(prefix='%s_fsz' % d.name) s = Symbol(name=name, dtype=np.int32, is_const=True) try: expr = DummyExpr(s, v[0]._C_get_field(FULL, d).size, init=True) except AttributeError: assert v[0].is_Array expr = DummyExpr(s, v[0].symbolic_shape[d], init=True) for f in v: imapper[f].append((d, s)) cache[f].stmts0.append(expr) # Build all exprs such as `y_slc0 = y_fsz0*z_fsz0` built = {} mapper = DefaultOrderedDict(list) for f, v in imapper.items(): for n, (d, _) in enumerate(v): expr = prod(list(zip(*v[n:]))[1]) try: stmt = built[expr] except KeyError: name = sregistry.make_name(prefix='%s_slc' % d.name) s = Symbol(name=name, dtype=np.int32, is_const=True) stmt = built[expr] = DummyExpr(s, expr, init=True) mapper[f].append(stmt.write) cache[f].stmts1.append(stmt) mapper.update([(f, []) for f in functions if f not in mapper]) # Build defines. For example: # `define uL(t, x, y, z) u[(t)*t_slice_sz + (x)*x_slice_sz + (y)*y_slice_sz + (z)]` headers = [] findexeds = {} for f, szs in mapper.items(): if cache[f].cbk is not None: # Perhaps we've already built an access macro for `f` through another efunc findexeds[f] = cache[f].cbk else: assert len(szs) == len(f.dimensions) - 1 pname = sregistry.make_name(prefix='%sL' % f.name) expr = sum([MacroArgument(d.name)*s for d, s in zip(f.dimensions, szs)]) expr += MacroArgument(f.dimensions[-1].name) expr = Indexed(IndexedData(f.name, None, f), expr) define = DefFunction(pname, f.dimensions) headers.append((ccode(define), ccode(expr))) cache[f].cbk = findexeds[f] = lambda i, pname=pname: FIndexed(i, pname) # Build "functional" Indexeds. For example: # `u[t2, x+8, y+9, z+7] => uL(t2, x+8, y+9, z+7)` mapper = {} for n in FindNodes(Expression).visit(iet): subs = {} for i in retrieve_indexed(n.expr): try: subs[i] = findexeds[i.function](i) except KeyError: pass mapper[n] = n._rebuild(expr=uxreplace(n.expr, subs)) # Put together all of the necessary exprs for `y_fsz0`, ..., `y_slc0`, ... stmts0 = filter_ordered(flatten(cache[f].stmts0 for f in functions)) if stmts0: stmts0.append(BlankLine) stmts1 = filter_ordered(flatten(cache[f].stmts1 for f in functions)) if stmts1: stmts1.append(BlankLine) iet = Transformer(mapper).visit(iet) body = iet.body._rebuild(body=tuple(stmts0) + tuple(stmts1) + iet.body.body) iet = iet._rebuild(body=body) return iet, headers
def visit_AugmentedExpression(self, o): code = c.Statement("%s %s= %s" % (ccode(o.expr.lhs, dtype=o.dtype), o.op, ccode(o.expr.rhs, dtype=o.dtype))) if o.pragmas: code = c.Module(list(o.pragmas) + [code]) return code
def _minimize_remainders(self, iet): """ Reshape temporary tensors and adjust loop trip counts to prevent as many compiler-generated remainder loops as possible. """ # The innermost dimension is the one that might get padded p_dim = -1 mapper = {} for tree in retrieve_iteration_tree(iet): vector_iterations = [i for i in tree if i.is_Vectorizable] if not vector_iterations or len(vector_iterations) > 1: continue root = vector_iterations[0] # Padding writes = [ i.write for i in FindNodes(Expression).visit(root) if i.write.is_Array ] padding = [] for i in writes: try: simd_items = self.platform.simd_items_per_reg(i.dtype) except KeyError: return iet, {} padding.append(simd_items - i.shape[-1] % simd_items) if len(set(padding)) == 1: padding = padding[0] for i in writes: padded = (i._padding[p_dim][0], i._padding[p_dim][1] + padding) i.update(padding=i._padding[:p_dim] + (padded, )) else: # Padding must be uniform -- not the case, so giving up continue # Dynamic trip count adjustment endpoint = root.symbolic_max if not endpoint.is_Symbol: continue condition = [] externals = set(i.symbolic_shape[-1] for i in FindSymbols().visit(root) if i.is_Tensor) for i in root.uindices: for j in externals: condition.append(root.symbolic_max + padding < j) condition = ' && '.join(ccode(i) for i in condition) endpoint_padded = endpoint.func('_%s' % endpoint.name) init = cgen.Initializer( cgen.Value("const int", endpoint_padded), cgen.Line('(%s) ? %s : %s' % (condition, ccode(endpoint + padding), endpoint))) # Update the Iteration bound limits = list(root.limits) limits[1] = endpoint_padded.func(endpoint_padded.name) rebuilt = list(tree) rebuilt[rebuilt.index(root)] = root._rebuild(limits=limits) mapper[tree[0]] = List(header=init, body=compose_nodes(rebuilt)) processed = Transformer(mapper).visit(iet) return processed, {}