def to_ops_stencil(param, accesses): dims = len(accesses[0]) pts = len(accesses) stencil_name = namespace['ops_stencil_name'](dims, param.name, pts) stencil_array = Array( name=stencil_name, dimensions=(DefaultDimension(name='len', default_value=dims * pts), ), dtype=np.int32, ) ops_stencil = OpsStencil(stencil_name.upper()) return ops_stencil, [ Expression( ClusterizedEq( Eq(stencil_array, ListInitializer(list(itertools.chain(*accesses)))))), Expression( ClusterizedEq( Eq( ops_stencil, namespace['ops_decl_stencil']( dims, pts, Symbol(stencil_array.name), Literal('"%s"' % stencil_name.upper()))))) ]
def create_ops_par_loop(trees, ops_kernel, parameters, block, name_to_ops_dat, accessible_origin, par_to_ops_stencil, dims): it_range = [] devito_to_ops_indexer = 1 for tree in trees: if isinstance(tree, IterationTree): for i in tree: it_range.extend( [i.symbolic_min, i.symbolic_max + devito_to_ops_indexer]) range_array = Array(name='%s_range' % ops_kernel.name, dimensions=(DefaultDimension( name='range', default_value=len(it_range)), ), dtype=np.int32, scope='stack') range_array_init = Expression( ClusterizedEq(Eq(range_array, ListInitializer(it_range)))) ops_args = [] for p in parameters: ops_arg = create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil) ops_args.append( ops_arg.ops_type(ops_arg.ops_name, ops_arg.elements_per_point, ops_arg.dtype, ops_arg.rw_flag)) ops_par_loop_call = Call(namespace['ops_par_loop'], [ Literal(ops_kernel.name), Literal('"%s"' % ops_kernel.name), block, dims, range_array, *ops_args ]) return [range_array_init], ops_par_loop_call
def create_ops_par_loop(trees, ops_kernel, parameters, block, name_to_ops_dat, accessible_origin, par_to_ops_stencil, dims): it_range = [] for tree in trees: if isinstance(tree, IterationTree): for bounds in [it.bounds() for it in tree]: it_range.extend(bounds) range_array = Array(name='%s_range' % ops_kernel.name, dimensions=(DefaultDimension( name='range', default_value=len(it_range)), ), dtype=np.int32, scope='stack') range_array_init = Expression( ClusterizedEq(Eq(range_array, ListInitializer(it_range)))) ops_par_loop_call = Call(namespace['ops_par_loop'], [ Literal(ops_kernel.name), Literal('"%s"' % ops_kernel.name), block, dims, range_array, *[ create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil) for p in parameters ] ]) return [range_array_init], ops_par_loop_call
def generate_ops_stencils(accesses): function_to_stencil = defaultdict(list) function_to_dims = {} ops_stencils_initializers = [] ops_stencils_symbols = {} for k, v in accesses.items(): to_skip = -1 if k.is_TimeFunction: to_skip = k._time_position stencils = [ (k1, list(v1)) for k1, v1 in groupby(v, lambda s: s[k._time_position][0]) ] for k1, v1 in stencils: name = "%s%s" % (k.name, k1) function_to_dims[name] = k.ndim - 1 function_to_stencil[name].extend([ offset for stencil in v1 for i, (_, offset) in enumerate(stencil) if i is not to_skip ]) else: function_to_dims[k.name] = k.ndim for s in v: function_to_stencil[k.name].extend( [offset for i, (_, offset) in enumerate(s)]) for f, stencil in function_to_stencil.items(): stencil_name = "s%sd_%s_%dpt" % (function_to_dims[f], f, len(stencil) / function_to_dims[f]) ops_stencil_arr = SymbolicArray(name=stencil_name, dimensions=(len(stencil), ), dtype=np.int32) ops_stencil = OPSStencil(stencil_name.upper()) arr_assign = Eq(ops_stencil_arr, ListInitializer(stencil)) ops_stencils_initializers.append(Expression(ClusterizedEq(arr_assign))) decl_call = Call("ops_decl_stencil", [ function_to_dims[f], int(len(stencil) / function_to_dims[f]), ops_stencil_arr, String(ops_stencil.name) ]) ops_stencils_symbols[f] = ops_stencil ops_stencils_initializers.append( Element(cgen.InlineInitializer(ops_stencil, decl_call))) return ops_stencils_initializers, ops_stencils_symbols
def opsit(trees, count, name_to_ops_dat, block, dims): """ Given an affine tree, generate a Callable representing an OPS Kernel. Parameters ---------- tree : IterationTree IterationTree containing the loop to extract into an OPS Kernel count : int Generated kernel counters """ node_factory = OPSNodeFactory() expressions = [] expressions.extend( *[FindNodes(Expression).visit(tree.inner) for tree in trees]) ops_expressions = [ Expression(make_ops_ast(expr.expr, node_factory)) for expr in expressions ] parameters = sorted(node_factory.ops_params, key=lambda i: (i.is_Constant, i.name)) stencil_arrays_initializations = [] par_to_ops_stencil = {} for p in parameters: if isinstance(p, OpsAccessible): stencil, initialization = to_ops_stencil( p, node_factory.ops_args_accesses[p]) par_to_ops_stencil[p] = stencil stencil_arrays_initializations.append(initialization) ops_kernel = Callable(namespace['ops_kernel'](count), ops_expressions, "void", parameters) ops_par_loop_init, ops_par_loop_call = create_ops_par_loop( trees, ops_kernel, parameters, block, name_to_ops_dat, node_factory.ops_args, par_to_ops_stencil, dims) pre_time_loop = stencil_arrays_initializations + ops_par_loop_init return pre_time_loop, ops_kernel, ops_par_loop_call
def opsit(trees, count): """ Given an affine tree, generate a Callable representing an OPS Kernel. Parameters ---------- tree : IterationTree IterationTree containing the loop to extract into an OPS Kernel count : int Generated kernel counters """ node_factory = OPSNodeFactory() expressions = [] ops_expressions = [] for tree in trees: expressions.extend(FindNodes(Expression).visit(tree.inner)) for expr in expressions: ops_expressions.append( Expression(make_ops_ast(expr.expr, node_factory))) parameters = sorted(node_factory.ops_params, key=lambda i: (i.is_Constant, i.name)) ops_kernel = Callable(namespace['ops_kernel'](count), ops_expressions, "void", parameters) stencil_arrays_initializations = itertools.chain(*[ to_ops_stencil(p, node_factory.ops_args_accesses[p]) for p in parameters if isinstance(p, OpsAccessible) ]) pre_time_loop = stencil_arrays_initializations return pre_time_loop, ops_kernel
def create_ops_dat(f, name_to_ops_dat, block): ndim = f.ndim - (1 if f.is_TimeFunction else 0) dim = Array(name=namespace['ops_dat_dim'](f.name), dimensions=(DefaultDimension(name='dim', default_value=ndim), ), dtype=np.int32, scope='stack') base = Array(name=namespace['ops_dat_base'](f.name), dimensions=(DefaultDimension(name='base', default_value=ndim), ), dtype=np.int32, scope='stack') d_p = Array(name=namespace['ops_dat_d_p'](f.name), dimensions=(DefaultDimension(name='d_p', default_value=ndim), ), dtype=np.int32, scope='stack') d_m = Array(name=namespace['ops_dat_d_m'](f.name), dimensions=(DefaultDimension(name='d_m', default_value=ndim), ), dtype=np.int32, scope='stack') base_val = [Zero() for i in range(ndim)] # If f is a TimeFunction we need to create a ops_dat for each time stepping # variable (eg: t1, t2) if f.is_TimeFunction: time_pos = f._time_position time_index = f.indices[time_pos] time_dims = f.shape[time_pos] dim_val = f.shape[:time_pos] + f.shape[time_pos + 1:] d_p_val = f._size_nodomain.left[time_pos + 1:] d_m_val = [-i for i in f._size_nodomain.right[time_pos + 1:]] ops_dat_array = Array(name=namespace['ops_dat_name'](f.name), dimensions=(DefaultDimension( name='dat', default_value=time_dims), ), dtype=namespace['ops_dat_type'], scope='stack') dat_decls = [] for i in range(time_dims): name = '%s%s%s' % (f.name, time_index, i) dat_decls.append(namespace['ops_decl_dat'](block, 1, Symbol(dim.name), Symbol(base.name), Symbol(d_m.name), Symbol(d_p.name), Byref(f.indexify([i])), Literal('"%s"' % f._C_typedata), Literal('"%s"' % name))) ops_decl_dat = Expression( ClusterizedEq(Eq(ops_dat_array, ListInitializer(dat_decls)))) # Inserting the ops_dat array in case of TimeFunction. name_to_ops_dat[f.name] = ops_dat_array else: ops_dat = OpsDat("%s_dat" % f.name) name_to_ops_dat[f.name] = ops_dat dim_val = f.shape d_p_val = f._size_nodomain.left d_m_val = [-i for i in f._size_nodomain.right] ops_decl_dat = Expression( ClusterizedEq( Eq( ops_dat, namespace['ops_decl_dat'](block, 1, Symbol(dim.name), Symbol(base.name), Symbol(d_m.name), Symbol(d_p.name), Byref(f.indexify([0])), Literal('"%s"' % f._C_typedata), Literal('"%s"' % f.name))))) dim_val = Expression(ClusterizedEq(Eq(dim, ListInitializer(dim_val)))) base_val = Expression(ClusterizedEq(Eq(base, ListInitializer(base_val)))) d_p_val = Expression(ClusterizedEq(Eq(d_p, ListInitializer(d_p_val)))) d_m_val = Expression(ClusterizedEq(Eq(d_m, ListInitializer(d_m_val)))) return OpsDatDecl(dim_val=dim_val, base_val=base_val, d_p_val=d_p_val, d_m_val=d_m_val, ops_decl_dat=ops_decl_dat)
def test_nodes_conditional(self, fc): then_body = Expression(DummyEq(fc[x, y], fc[x, y] + 1)) else_body = Expression(DummyEq(fc[x, y], fc[x, y] + 2)) conditional = Conditional(x < 3, then_body, else_body) assert str(conditional) == """\
def create_ops_dat(f, name_to_ops_dat, block): ndim = f.ndim - (1 if f.is_TimeFunction else 0) dim = Array(name=namespace['ops_dat_dim'](f.name), dimensions=(DefaultDimension(name='dim', default_value=ndim), ), dtype=np.int32, scope='stack') base = Array(name=namespace['ops_dat_base'](f.name), dimensions=(DefaultDimension(name='base', default_value=ndim), ), dtype=np.int32, scope='stack') d_p = Array(name=namespace['ops_dat_d_p'](f.name), dimensions=(DefaultDimension(name='d_p', default_value=ndim), ), dtype=np.int32, scope='stack') d_m = Array(name=namespace['ops_dat_d_m'](f.name), dimensions=(DefaultDimension(name='d_m', default_value=ndim), ), dtype=np.int32, scope='stack') res = [] base_val = [Zero() for i in range(ndim)] # If f is a TimeFunction we need to create a ops_dat for each time stepping # variable (eg: t1, t2) if f.is_TimeFunction: time_pos = f._time_position time_index = f.indices[time_pos] time_dims = f.shape[time_pos] dim_shape = sympify(f.shape[:time_pos] + f.shape[time_pos + 1:]) padding = f.padding[:time_pos] + f.padding[time_pos + 1:] halo = f.halo[:time_pos] + f.halo[time_pos + 1:] d_p_val = tuple(sympify([p[0] + h[0] for p, h in zip(padding, halo)])) d_m_val = tuple( sympify([-(p[1] + h[1]) for p, h in zip(padding, halo)])) ops_dat_array = Array(name=namespace['ops_dat_name'](f.name), dimensions=(DefaultDimension( name='dat', default_value=time_dims), ), dtype='ops_dat', scope='stack') dat_decls = [] for i in range(time_dims): name = '%s%s%s' % (f.name, time_index, i) name_to_ops_dat[name] = ops_dat_array.indexify( [Symbol('%s%s' % (time_index, i))]) dat_decls.append(namespace['ops_decl_dat'](block, 1, Symbol(dim.name), Symbol(base.name), Symbol(d_m.name), Symbol(d_p.name), Byref(f.indexify([i])), Literal('"%s"' % f._C_typedata), Literal('"%s"' % name))) ops_decl_dat = Expression( ClusterizedEq(Eq(ops_dat_array, ListInitializer(dat_decls)))) else: ops_dat = OpsDat("%s_dat" % f.name) name_to_ops_dat[f.name] = ops_dat d_p_val = tuple( sympify([p[0] + h[0] for p, h in zip(f.padding, f.halo)])) d_m_val = tuple( sympify([-(p[1] + h[1]) for p, h in zip(f.padding, f.halo)])) dim_shape = sympify(f.shape) ops_decl_dat = Expression( ClusterizedEq( Eq( ops_dat, namespace['ops_decl_dat'](block, 1, Symbol(dim.name), Symbol(base.name), Symbol(d_m.name), Symbol(d_p.name), Byref(f.indexify([0])), Literal('"%s"' % f._C_typedata), Literal('"%s"' % f.name))))) res.append(Expression(ClusterizedEq(Eq(dim, ListInitializer(dim_shape))))) res.append(Expression(ClusterizedEq(Eq(base, ListInitializer(base_val))))) res.append(Expression(ClusterizedEq(Eq(d_p, ListInitializer(d_p_val))))) res.append(Expression(ClusterizedEq(Eq(d_m, ListInitializer(d_m_val))))) res.append(ops_decl_dat) return res
def test_iet_conditional(fc): then_body = Expression(Eq(fc[x, y], fc[x, y] + 1)) else_body = Expression(Eq(fc[x, y], fc[x, y] + 2)) conditional = Conditional(x < 3, then_body, else_body) assert str(conditional) == """\