def create_ops_par_loop(trees, ops_kernel, parameters, block, name_to_ops_dat, accessible_origin, par_to_ops_stencil, dims): it_range = [] for tree in trees: if isinstance(tree, IterationTree): for bounds in [it.bounds() for it in tree]: it_range.extend(bounds) range_array = Array(name='%s_range' % ops_kernel.name, dimensions=(DefaultDimension( name='range', default_value=len(it_range)), ), dtype=np.int32, scope='stack') range_array_init = Expression( ClusterizedEq(Eq(range_array, ListInitializer(it_range)))) ops_par_loop_call = Call(namespace['ops_par_loop'], [ Literal(ops_kernel.name), Literal('"%s"' % ops_kernel.name), block, dims, range_array, *[ create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil) for p in parameters ] ]) return [range_array_init], ops_par_loop_call
def test_create_ops_dat_function(self): grid = Grid(shape=(4)) u = Function(name='u', grid=grid, space_order=2) block = OpsBlock('block') name_to_ops_dat = {} result = create_ops_dat(u, name_to_ops_dat, block) assert name_to_ops_dat['u'].name == namespace['ops_dat_name'](u.name) assert name_to_ops_dat['u']._C_typename == namespace['ops_dat_type'] assert result[0].expr.lhs.name == namespace['ops_dat_dim'](u.name) assert result[0].expr.rhs.params == (Integer(4), ) assert result[1].expr.lhs.name == namespace['ops_dat_base'](u.name) assert result[1].expr.rhs.params == (Zero(), ) assert result[2].expr.lhs.name == namespace['ops_dat_d_p'](u.name) assert result[2].expr.rhs.params == (Integer(2), ) assert result[3].expr.lhs.name == namespace['ops_dat_d_m'](u.name) assert result[3].expr.rhs.params == (Integer(-2), ) assert result[4].expr.lhs == name_to_ops_dat['u'] assert type(result[4].expr.rhs) == namespace['ops_decl_dat'] assert result[4].expr.rhs.args == ( block, 1, Symbol(namespace['ops_dat_dim'](u.name)), Symbol(namespace['ops_dat_base'](u.name)), Symbol(namespace['ops_dat_d_m'](u.name)), Symbol(namespace['ops_dat_d_p'](u.name)), Byref(u.indexify( (0, ))), Literal('"%s"' % u._C_typedata), Literal('"u"'))
def create_ops_par_loop(trees, ops_kernel, parameters, block, name_to_ops_dat, accessible_origin, par_to_ops_stencil, dims): it_range = [] devito_to_ops_indexer = 1 for tree in trees: if isinstance(tree, IterationTree): for i in tree: it_range.extend( [i.symbolic_min, i.symbolic_max + devito_to_ops_indexer]) range_array = Array(name='%s_range' % ops_kernel.name, dimensions=(DefaultDimension( name='range', default_value=len(it_range)), ), dtype=np.int32, scope='stack') range_array_init = Expression( ClusterizedEq(Eq(range_array, ListInitializer(it_range)))) ops_args = [] for p in parameters: ops_arg = create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil) ops_args.append( ops_arg.ops_type(ops_arg.ops_name, ops_arg.elements_per_point, ops_arg.dtype, ops_arg.rw_flag)) ops_par_loop_call = Call(namespace['ops_par_loop'], [ Literal(ops_kernel.name), Literal('"%s"' % ops_kernel.name), block, dims, range_array, *ops_args ]) return [range_array_init], ops_par_loop_call
def create_ops_arg(p, name_to_ops_dat, par_to_ops_stencil): if p.is_Constant: return namespace['ops_arg_gbl']( Byref(Constant(name=p.name[1:])), 1, Literal('"%s"' % dtype_to_cstr(p.dtype)), namespace['ops_read']) else: return namespace['ops_arg_dat']( name_to_ops_dat[p.name], 1, par_to_ops_stencil[p], Literal('"%s"' % dtype_to_cstr(p.dtype)), namespace['ops_read'] if p.read_only else namespace['ops_write'])
def _specialize_iet(self, iet, **kwargs): warning("The OPS backend is still work-in-progress") ops_init = Call(namespace['ops_init'], [0, 0, 2]) ops_partition = Call(namespace['ops_partition'], Literal('""')) ops_exit = Call(namespace['ops_exit']) ops_block = OpsBlock('block') # Extract all symbols that need to be converted to ops_dat dims = [] to_dat = set() for section, trees in find_affine_trees(iet).items(): dims.append(len(trees[0].dimensions)) symbols = set(FindSymbols('symbolics').visit(trees[0].root)) symbols -= set(FindSymbols('defines').visit(trees[0].root)) to_dat |= symbols # To ensure deterministic code generation we order the datasets to # be generated (since a set is an unordered collection) to_dat = filter_sorted(to_dat) name_to_ops_dat = {} pre_time_loop = [] for f in to_dat: if f.is_Constant: continue pre_time_loop.extend(create_ops_dat(f, name_to_ops_dat, ops_block)) for n, (section, trees) in enumerate(find_affine_trees(iet).items()): pre_loop, ops_kernel = opsit(trees, n) pre_time_loop.extend(pre_loop) self._ops_kernels.append(ops_kernel) assert (d == dims[0] for d in dims), \ "The OPS backend currently assumes that all kernels \ have the same number of dimensions" ops_block_init = Expression( ClusterizedEq( Eq(ops_block, namespace['ops_decl_block'](dims[0], Literal('"block"'))))) self._headers.append(namespace['ops_define_dimension'](dims[0])) self._includes.append('stdio.h') body = [ ops_init, ops_block_init, *pre_time_loop, ops_partition, iet, ops_exit ] return List(body=body)
def create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil): elements_per_point = 1 dtype = Literal('"%s"' % dtype_to_cstr(p.dtype)) if p.is_Constant: ops_type = namespace['ops_arg_gbl'] ops_name = Byref(Constant(name=p.name[1:])) rw_flag = namespace['ops_read'] else: ops_type = namespace['ops_arg_dat'] accessible_info = accessible_origin[p.name] ops_name = name_to_ops_dat[p.name] \ if accessible_info.time is None \ else name_to_ops_dat[accessible_info.origin_name].\ indexify([Add(accessible_info.time, accessible_info.shift)]) rw_flag = namespace['ops_read'] if p.read_only else namespace[ 'ops_write'] ops_arg = OpsArgDecl(ops_type=ops_type, ops_name=ops_name, elements_per_point=elements_per_point, dtype=dtype, rw_flag=rw_flag) return ops_arg
def to_ops_stencil(param, accesses): dims = len(accesses[0]) pts = len(accesses) stencil_name = namespace['ops_stencil_name'](dims, param.name, pts) stencil_array = Array( name=stencil_name, dimensions=(DefaultDimension(name='len', default_value=dims * pts), ), dtype=np.int32, ) ops_stencil = OpsStencil(stencil_name.upper()) return ops_stencil, [ Expression( ClusterizedEq( Eq(stencil_array, ListInitializer(list(itertools.chain(*accesses)))))), Expression( ClusterizedEq( Eq( ops_stencil, namespace['ops_decl_stencil']( dims, pts, Symbol(stencil_array.name), Literal('"%s"' % stencil_name.upper()))))) ]
def create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil): if p.is_Constant: return namespace['ops_arg_gbl']( Byref(Constant(name=p.name[1:])), 1, Literal('"%s"' % dtype_to_cstr(p.dtype)), namespace['ops_read']) else: accessible_info = accessible_origin[p.name] dat_name = name_to_ops_dat[p.name] \ if accessible_info.time is None \ else name_to_ops_dat[accessible_info.origin_name].\ indexify([accessible_info.time]) return namespace['ops_arg_dat']( dat_name, 1, par_to_ops_stencil[p], Literal('"%s"' % dtype_to_cstr(p.dtype)), namespace['ops_read'] if p.read_only else namespace['ops_write'])
def test_create_ops_arg_constant(self): a = Constant(name='*a') res = create_ops_arg(a, {}, {}) assert res.name == namespace['ops_arg_gbl'].name assert res.args == [ Byref(Constant(name='a')), 1, Literal('"%s"' % dtype_to_cstr(a.dtype)), namespace['ops_read'] ]
def test_create_ops_arg_constant(self): a = Constant(name='*a') ops_arg = create_ops_arg(a, {}, {}, {}) assert ops_arg.ops_type == namespace['ops_arg_gbl'] assert str(ops_arg.ops_name) == str(Byref(Constant(name='a'))) assert ops_arg.elements_per_point == 1 assert ops_arg.dtype == Literal('"%s"' % dtype_to_cstr(a.dtype)) assert ops_arg.rw_flag == namespace['ops_read']
def test_create_ops_arg_constant(self): a = Constant(name='*a') res = create_ops_arg(a, {}, {}, {}) assert type(res) == namespace['ops_arg_gbl'] assert str(res.args[0]) == str(Byref(Constant(name='a'))) assert res.args[1] == 1 assert res.args[2] == Literal('"%s"' % dtype_to_cstr(a.dtype)) assert res.args[3] == namespace['ops_read']
def test_create_ops_arg_function(self, read): u = OpsAccessible('u', np.float32, read) dat = OpsDat('u_dat') stencil = OpsStencil('stencil') info = AccessibleInfo(u, None, None) res = create_ops_arg(u, {'u': info}, {'u': dat}, {u: stencil}) assert type(res) == namespace['ops_arg_dat'] assert res.args == (dat, 1, stencil, Literal('"%s"' % dtype_to_cstr(u.dtype)), namespace['ops_read'] if read else namespace['ops_write'])
def test_create_ops_arg_function(self, read): u = OpsAccessible('u', dtype=np.float32, read_only=read) dat = OpsDat('u_dat') stencil = OpsStencil('stencil') info = AccessibleInfo(u, None, None) ops_arg = create_ops_arg(u, {'u': info}, {'u': dat}, {u: stencil}) assert ops_arg.ops_type == namespace['ops_arg_dat'] assert ops_arg.ops_name == OpsDat('u_dat') assert ops_arg.elements_per_point == 1 assert ops_arg.dtype == Literal('"%s"' % dtype_to_cstr(u.dtype)) assert ops_arg.rw_flag == \ namespace['ops_read'] if read else namespace['ops_write']
def test_to_ops_stencil(self, _accesses): param = Symbol('foo') accesses = eval(_accesses) stencil_name = 's2d_foo_%spt' % len(accesses) stencil, result = to_ops_stencil(param, accesses) assert stencil.name == stencil_name.upper() assert result[0].expr.lhs.name == stencil_name assert result[0].expr.rhs.params == tuple(itertools.chain(*accesses)) assert result[1].expr.lhs == stencil assert type(result[1].expr.rhs) == namespace['ops_decl_stencil'] assert result[1].expr.rhs.args == (2, len(accesses), Symbol(stencil_name), Literal('"%s"' % stencil_name.upper()))
def test_create_ops_dat_function(self): grid = Grid(shape=(4)) u = Function(name='u', grid=grid, space_order=2) block = OpsBlock('block') name_to_ops_dat = {} result = create_ops_dat(u, name_to_ops_dat, block) assert name_to_ops_dat['u'].name == namespace['ops_dat_name'](u.name) assert name_to_ops_dat['u']._C_typename == namespace['ops_dat_type'] assert result[0].expr.lhs.name == namespace['ops_dat_dim'](u.name) assert result[0].expr.rhs.params == (Integer(4), ) assert result[1].expr.lhs.name == namespace['ops_dat_base'](u.name) assert result[1].expr.rhs.params == (Zero(), ) assert result[2].expr.lhs.name == namespace['ops_dat_d_p'](u.name) assert result[2].expr.rhs.params == (Integer(2), ) assert result[3].expr.lhs.name == namespace['ops_dat_d_m'](u.name) assert result[3].expr.rhs.params == (Integer(-2), ) assert result[4].expr.lhs == name_to_ops_dat['u'] assert result[4].expr.rhs.name == namespace['ops_decl_dat'].name assert result[4].expr.rhs.args == ( block, 1, Symbol(namespace['ops_dat_dim'](u.name)), Symbol(namespace['ops_dat_base'](u.name)), Symbol(namespace['ops_dat_d_m'](u.name)), Symbol(namespace['ops_dat_d_p'](u.name)), Byref(u.indexify( (0, ))), Literal('"%s"' % u._C_typedata), Literal('"u"')) def test_create_ops_arg_constant(self): a = Constant(name='*a') res = create_ops_arg(a, {}, {}) assert res.name == namespace['ops_arg_gbl'].name assert res.args == [ Byref(Constant(name='a')), 1, Literal('"%s"' % dtype_to_cstr(a.dtype)), namespace['ops_read'] ] @pytest.mark.parametrize('read', [True, False]) def test_create_ops_arg_function(self, read): u = OpsAccessible('u', np.float32, read) dat = OpsDat('u_dat') stencil = OpsStencil('stencil') res = create_ops_arg(u, {'u': dat}, {u: stencil}) assert res.name == namespace['ops_arg_dat'].name assert res.args == [ dat, 1, stencil, Literal('"%s"' % dtype_to_cstr(u.dtype)), namespace['ops_read'] if read else namespace['ops_write'] ]
def create_ops_dat(f, name_to_ops_dat, block): ndim = f.ndim - (1 if f.is_TimeFunction else 0) dim = Array(name=namespace['ops_dat_dim'](f.name), dimensions=(DefaultDimension(name='dim', default_value=ndim), ), dtype=np.int32, scope='stack') base = Array(name=namespace['ops_dat_base'](f.name), dimensions=(DefaultDimension(name='base', default_value=ndim), ), dtype=np.int32, scope='stack') d_p = Array(name=namespace['ops_dat_d_p'](f.name), dimensions=(DefaultDimension(name='d_p', default_value=ndim), ), dtype=np.int32, scope='stack') d_m = Array(name=namespace['ops_dat_d_m'](f.name), dimensions=(DefaultDimension(name='d_m', default_value=ndim), ), dtype=np.int32, scope='stack') res = [] base_val = [Zero() for i in range(ndim)] # If f is a TimeFunction we need to create a ops_dat for each time stepping # variable (eg: t1, t2) if f.is_TimeFunction: time_pos = f._time_position time_index = f.indices[time_pos] time_dims = f.shape[time_pos] dim_shape = sympify(f.shape[:time_pos] + f.shape[time_pos + 1:]) padding = f.padding[:time_pos] + f.padding[time_pos + 1:] halo = f.halo[:time_pos] + f.halo[time_pos + 1:] d_p_val = tuple(sympify([p[0] + h[0] for p, h in zip(padding, halo)])) d_m_val = tuple( sympify([-(p[1] + h[1]) for p, h in zip(padding, halo)])) ops_dat_array = Array(name=namespace['ops_dat_name'](f.name), dimensions=(DefaultDimension( name='dat', default_value=time_dims), ), dtype='ops_dat', scope='stack') dat_decls = [] for i in range(time_dims): name = '%s%s%s' % (f.name, time_index, i) name_to_ops_dat[name] = ops_dat_array.indexify( [Symbol('%s%s' % (time_index, i))]) dat_decls.append(namespace['ops_decl_dat'](block, 1, Symbol(dim.name), Symbol(base.name), Symbol(d_m.name), Symbol(d_p.name), Byref(f.indexify([i])), Literal('"%s"' % f._C_typedata), Literal('"%s"' % name))) ops_decl_dat = Expression( ClusterizedEq(Eq(ops_dat_array, ListInitializer(dat_decls)))) else: ops_dat = OpsDat("%s_dat" % f.name) name_to_ops_dat[f.name] = ops_dat d_p_val = tuple( sympify([p[0] + h[0] for p, h in zip(f.padding, f.halo)])) d_m_val = tuple( sympify([-(p[1] + h[1]) for p, h in zip(f.padding, f.halo)])) dim_shape = sympify(f.shape) ops_decl_dat = Expression( ClusterizedEq( Eq( ops_dat, namespace['ops_decl_dat'](block, 1, Symbol(dim.name), Symbol(base.name), Symbol(d_m.name), Symbol(d_p.name), Byref(f.indexify([0])), Literal('"%s"' % f._C_typedata), Literal('"%s"' % f.name))))) res.append(Expression(ClusterizedEq(Eq(dim, ListInitializer(dim_shape))))) res.append(Expression(ClusterizedEq(Eq(base, ListInitializer(base_val))))) res.append(Expression(ClusterizedEq(Eq(d_p, ListInitializer(d_p_val))))) res.append(Expression(ClusterizedEq(Eq(d_m, ListInitializer(d_m_val))))) res.append(ops_decl_dat) return res
def create_ops_dat(f, name_to_ops_dat, block): ndim = f.ndim - (1 if f.is_TimeFunction else 0) dim = Array(name=namespace['ops_dat_dim'](f.name), dimensions=(DefaultDimension(name='dim', default_value=ndim), ), dtype=np.int32, scope='stack') base = Array(name=namespace['ops_dat_base'](f.name), dimensions=(DefaultDimension(name='base', default_value=ndim), ), dtype=np.int32, scope='stack') d_p = Array(name=namespace['ops_dat_d_p'](f.name), dimensions=(DefaultDimension(name='d_p', default_value=ndim), ), dtype=np.int32, scope='stack') d_m = Array(name=namespace['ops_dat_d_m'](f.name), dimensions=(DefaultDimension(name='d_m', default_value=ndim), ), dtype=np.int32, scope='stack') base_val = [Zero() for i in range(ndim)] # If f is a TimeFunction we need to create a ops_dat for each time stepping # variable (eg: t1, t2) if f.is_TimeFunction: time_pos = f._time_position time_index = f.indices[time_pos] time_dims = f.shape[time_pos] dim_val = f.shape[:time_pos] + f.shape[time_pos + 1:] d_p_val = f._size_nodomain.left[time_pos + 1:] d_m_val = [-i for i in f._size_nodomain.right[time_pos + 1:]] ops_dat_array = Array(name=namespace['ops_dat_name'](f.name), dimensions=(DefaultDimension( name='dat', default_value=time_dims), ), dtype=namespace['ops_dat_type'], scope='stack') dat_decls = [] for i in range(time_dims): name = '%s%s%s' % (f.name, time_index, i) dat_decls.append(namespace['ops_decl_dat'](block, 1, Symbol(dim.name), Symbol(base.name), Symbol(d_m.name), Symbol(d_p.name), Byref(f.indexify([i])), Literal('"%s"' % f._C_typedata), Literal('"%s"' % name))) ops_decl_dat = Expression( ClusterizedEq(Eq(ops_dat_array, ListInitializer(dat_decls)))) # Inserting the ops_dat array in case of TimeFunction. name_to_ops_dat[f.name] = ops_dat_array else: ops_dat = OpsDat("%s_dat" % f.name) name_to_ops_dat[f.name] = ops_dat dim_val = f.shape d_p_val = f._size_nodomain.left d_m_val = [-i for i in f._size_nodomain.right] ops_decl_dat = Expression( ClusterizedEq( Eq( ops_dat, namespace['ops_decl_dat'](block, 1, Symbol(dim.name), Symbol(base.name), Symbol(d_m.name), Symbol(d_p.name), Byref(f.indexify([0])), Literal('"%s"' % f._C_typedata), Literal('"%s"' % f.name))))) dim_val = Expression(ClusterizedEq(Eq(dim, ListInitializer(dim_val)))) base_val = Expression(ClusterizedEq(Eq(base, ListInitializer(base_val)))) d_p_val = Expression(ClusterizedEq(Eq(d_p, ListInitializer(d_p_val)))) d_m_val = Expression(ClusterizedEq(Eq(d_m, ListInitializer(d_m_val)))) return OpsDatDecl(dim_val=dim_val, base_val=base_val, d_p_val=d_p_val, d_m_val=d_m_val, ops_decl_dat=ops_decl_dat)
def make_ops_kernels(iet): warning("The OPS backend is still work-in-progress") affine_trees = find_affine_trees(iet).items() # If there is no affine trees, then there is no loop to be optimized using OPS. if not affine_trees: return iet, {} ops_init = Call(namespace['ops_init'], [0, 0, 2]) ops_partition = Call(namespace['ops_partition'], Literal('""')) ops_exit = Call(namespace['ops_exit']) # Extract all symbols that need to be converted to ops_dat dims = [] to_dat = set() for _, tree in affine_trees: dims.append(len(tree[0].dimensions)) symbols = set(FindSymbols('symbolics').visit(tree[0].root)) symbols -= set(FindSymbols('defines').visit(tree[0].root)) to_dat |= symbols # Create the OPS block for this problem ops_block = OpsBlock('block') ops_block_init = Expression( ClusterizedEq( Eq(ops_block, namespace['ops_decl_block'](dims[0], Literal('"block"'))))) # To ensure deterministic code generation we order the datasets to # be generated (since a set is an unordered collection) to_dat = filter_sorted(to_dat) name_to_ops_dat = {} pre_time_loop = [] after_time_loop = [] for f in to_dat: if f.is_Constant: continue pre_time_loop.extend( list(create_ops_dat(f, name_to_ops_dat, ops_block))) # Copy data from device to host after_time_loop.extend( create_ops_fetch(f, name_to_ops_dat, f.grid.time_dim.extreme_max)) # Generate ops kernels for each offloadable iteration tree mapper = {} ffuncs = [] for n, (_, tree) in enumerate(affine_trees): pre_loop, ops_kernel, ops_par_loop_call = opsit( tree, n, name_to_ops_dat, ops_block, dims[0]) pre_time_loop.extend(pre_loop) ffuncs.append(ops_kernel) mapper[tree[0].root] = ops_par_loop_call mapper.update({i.root: mapper.get(i.root) for i in tree}) # Drop trees iet = Transformer(mapper).visit(iet) assert (d == dims[0] for d in dims), \ "The OPS backend currently assumes that all kernels \ have the same number of dimensions" iet = iet._rebuild(body=flatten([ ops_init, ops_block_init, pre_time_loop, ops_partition, iet.body, after_time_loop, ops_exit ])) return iet, { 'includes': ['stdio.h', 'ops_seq.h'], 'ffuncs': ffuncs, 'headers': [namespace['ops_define_dimension'](dims[0])] }
def _specialize_iet(self, iet, **kwargs): warning("The OPS backend is still work-in-progress") affine_trees = find_affine_trees(iet).items() # If there is no affine trees, then there is no loop to be optimized using OPS. if not affine_trees: return iet ops_init = Call(namespace['ops_init'], [0, 0, 2]) ops_partition = Call(namespace['ops_partition'], Literal('""')) ops_exit = Call(namespace['ops_exit']) # Extract all symbols that need to be converted to ops_dat dims = [] to_dat = set() for _, tree in affine_trees: dims.append(len(tree[0].dimensions)) symbols = set(FindSymbols('symbolics').visit(tree[0].root)) symbols -= set(FindSymbols('defines').visit(tree[0].root)) to_dat |= symbols # Create the OPS block for this problem ops_block = OpsBlock('block') ops_block_init = Expression( ClusterizedEq( Eq(ops_block, namespace['ops_decl_block'](dims[0], Literal('"block"'))))) # To ensure deterministic code generation we order the datasets to # be generated (since a set is an unordered collection) to_dat = filter_sorted(to_dat) name_to_ops_dat = {} pre_time_loop = [] after_time_loop = [] for f in to_dat: if f.is_Constant: continue pre_time_loop.extend( list(create_ops_dat(f, name_to_ops_dat, ops_block))) # To return the result to Devito, it is necessary to copy the data # from the dat object back to the CPU memory. after_time_loop.extend( create_ops_fetch(f, name_to_ops_dat, self.time_dimension.extreme_max)) # Generate ops kernels for each offloadable iteration tree mapper = {} for n, (_, tree) in enumerate(affine_trees): pre_loop, ops_kernel, ops_par_loop_call = opsit( tree, n, name_to_ops_dat, ops_block, dims[0]) pre_time_loop.extend(pre_loop) self._func_table[namespace['ops_kernel_file'](ops_kernel.name)] = \ MetaCall(ops_kernel, False) mapper[tree[0].root] = ops_par_loop_call mapper.update({i.root: mapper.get(i.root) for i in tree}) # Drop trees iet = Transformer(mapper).visit(iet) assert (d == dims[0] for d in dims), \ "The OPS backend currently assumes that all kernels \ have the same number of dimensions" self._headers.append(namespace['ops_define_dimension'](dims[0])) self._includes.extend(['stdio.h', 'ops_seq.h']) body = [ ops_init, ops_block_init, *pre_time_loop, ops_partition, iet, *after_time_loop, ops_exit ] return List(body=body)