def _specialize_iet(self, iet, **kwargs): mapper = {} self._includes.append('ops_seq.h') ops_init = Call("ops_init", [0, 0, 2]) ops_timing = Call("ops_timing_output", [FunctionPointer("stdout")]) ops_exit = Call("ops_exit") global_declarations = [] dims = None for n, (section, trees) in enumerate(find_affine_trees(iet).items()): callable_kernel, declarations, par_loop_call_block, dims = opsit( trees, n) global_declarations.extend(declarations) self._header_functions.append(callable_kernel) mapper[trees[0].root] = par_loop_call_block mapper.update({i.root: mapper.get(i.root) for i in trees}) # Drop trees self._headers.append('#define OPS_%sD' % dims) warning("The OPS backend is still work-in-progress") global_declarations.append(Transformer(mapper).visit(iet)) return List( body=[ops_init, *global_declarations, ops_timing, ops_exit])
def _make_thread_init(threads, tfunc, isdata, sdata, sregistry): d = threads.index if threads.size == 1: callback = lambda body: body else: callback = lambda body: Iteration(body, d, threads.size - 1) # A unique identifier for each created pthread pthreadid = d + threads.base_id # Initialize `sdata` arguments = list(isdata.parameters) arguments[-3] = sdata.symbolic_base + d arguments[-2] = pthreadid arguments[-1] = sregistry.deviceid call0 = Call(isdata.name, arguments) # Create pthreads call1 = Call('pthread_create', (threads.symbolic_base + d, Macro('NULL'), Call(tfunc.name, [], is_indirect=True), sdata.symbolic_base + d)) threadsinit = List( header=c.Comment("Fire up and initialize `%s`" % threads.name), body=callback([call0, call1]) ) return threadsinit
def create_ops_par_loop(trees, ops_kernel, parameters, block, name_to_ops_dat, accessible_origin, par_to_ops_stencil, dims): it_range = [] devito_to_ops_indexer = 1 for tree in trees: if isinstance(tree, IterationTree): for i in tree: it_range.extend( [i.symbolic_min, i.symbolic_max + devito_to_ops_indexer]) range_array = Array(name='%s_range' % ops_kernel.name, dimensions=(DefaultDimension( name='range', default_value=len(it_range)), ), dtype=np.int32, scope='stack') range_array_init = Expression( ClusterizedEq(Eq(range_array, ListInitializer(it_range)))) ops_args = [] for p in parameters: ops_arg = create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil) ops_args.append( ops_arg.ops_type(ops_arg.ops_name, ops_arg.elements_per_point, ops_arg.dtype, ops_arg.rw_flag)) ops_par_loop_call = Call(namespace['ops_par_loop'], [ Literal(ops_kernel.name), Literal('"%s"' % ops_kernel.name), block, dims, range_array, *ops_args ]) return [range_array_init], ops_par_loop_call
def create_ops_par_loop(trees, ops_kernel, parameters, block, name_to_ops_dat, accessible_origin, par_to_ops_stencil, dims): it_range = [] for tree in trees: if isinstance(tree, IterationTree): for bounds in [it.bounds() for it in tree]: it_range.extend(bounds) range_array = Array(name='%s_range' % ops_kernel.name, dimensions=(DefaultDimension( name='range', default_value=len(it_range)), ), dtype=np.int32, scope='stack') range_array_init = Expression( ClusterizedEq(Eq(range_array, ListInitializer(it_range)))) ops_par_loop_call = Call(namespace['ops_par_loop'], [ Literal(ops_kernel.name), Literal('"%s"' % ops_kernel.name), block, dims, range_array, *[ create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil) for p in parameters ] ]) return [range_array_init], ops_par_loop_call
def generate_ops_stencils(accesses): function_to_stencil = defaultdict(list) function_to_dims = {} ops_stencils_initializers = [] ops_stencils_symbols = {} for k, v in accesses.items(): to_skip = -1 if k.is_TimeFunction: to_skip = k._time_position stencils = [ (k1, list(v1)) for k1, v1 in groupby(v, lambda s: s[k._time_position][0]) ] for k1, v1 in stencils: name = "%s%s" % (k.name, k1) function_to_dims[name] = k.ndim - 1 function_to_stencil[name].extend([ offset for stencil in v1 for i, (_, offset) in enumerate(stencil) if i is not to_skip ]) else: function_to_dims[k.name] = k.ndim for s in v: function_to_stencil[k.name].extend( [offset for i, (_, offset) in enumerate(s)]) for f, stencil in function_to_stencil.items(): stencil_name = "s%sd_%s_%dpt" % (function_to_dims[f], f, len(stencil) / function_to_dims[f]) ops_stencil_arr = SymbolicArray(name=stencil_name, dimensions=(len(stencil), ), dtype=np.int32) ops_stencil = OPSStencil(stencil_name.upper()) arr_assign = Eq(ops_stencil_arr, ListInitializer(stencil)) ops_stencils_initializers.append(Expression(ClusterizedEq(arr_assign))) decl_call = Call("ops_decl_stencil", [ function_to_dims[f], int(len(stencil) / function_to_dims[f]), ops_stencil_arr, String(ops_stencil.name) ]) ops_stencils_symbols[f] = ops_stencil ops_stencils_initializers.append( Element(cgen.InlineInitializer(ops_stencil, decl_call))) return ops_stencils_initializers, ops_stencils_symbols
def make_call(self, dynamic_parameters_mapper=None): dynamic_parameters_mapper = dynamic_parameters_mapper or {} arguments = list(self.parameters) for k, v in dynamic_parameters_mapper.items(): # Sanity check if k not in self._mapper: raise ValueError("`k` is not a dynamic parameter" % k) if len(self._mapper[k]) != len(v): raise ValueError( "Expected %d values for dynamic parameter `%s`, given %d" % (len(self._mapper[k]), k, len(v))) # Create the argument list for i, j in zip(self._mapper[k], v): arguments[i] = j return Call(self.name, tuple(arguments))
def _make_thread_finalize(threads, sdata): d = threads.index if threads.size == 1: callback = lambda body: body else: callback = lambda body: Iteration(body, d, threads.size - 1) threadswait = List( header=c.Comment("Wait for completion of `%s`" % threads.name), body=callback([ While(CondEq(FieldFromComposite(sdata._field_flag, sdata[d]), 2)), DummyExpr(FieldFromComposite(sdata._field_flag, sdata[d]), 0), Call('pthread_join', (threads[d], Macro('NULL'))) ])) return threadswait
OpsDatDecl = namedtuple( 'OpsDatDecl', ['dim_val', 'base_val', 'd_p_val', 'd_m_val', 'ops_decl_dat']) OpsArgDecl = namedtuple( 'OpsArgDecl', ['ops_type', 'ops_name', 'elements_per_point', 'dtype', 'rw_flag']) # OPS API namespace['ops_init'] = 'ops_init' namespace['ops_partition'] = 'ops_partition' namespace['ops_timing_output'] = 'ops_timing_output' namespace['ops_exit'] = 'ops_exit' namespace['ops_par_loop'] = 'ops_par_loop' namespace['ops_dat_fetch_data'] = lambda ops_dat, data: Call( name='ops_dat_fetch_data', arguments=[ops_dat, 0, data]) namespace['ops_decl_stencil'] = Function(name='ops_decl_stencil') namespace['ops_decl_block'] = Function(name='ops_decl_block') namespace['ops_decl_dat'] = Function(name='ops_decl_dat') namespace['ops_arg_dat'] = Function(name='ops_arg_dat') namespace['ops_arg_gbl'] = Function(name='ops_arg_gbl') namespace['ops_read'] = Macro('OPS_READ') namespace['ops_write'] = Macro('OPS_WRITE') namespace['ops_stencil_type'] = 'ops_stencil' namespace['ops_block_type'] = 'ops_block' namespace['ops_dat_type'] = 'ops_dat' # Naming conventions