def test_collapsing_v2(self): """ MFE from issue #1478. """ n = 8 m = 8 nx, ny, nchi, ncho = 12, 12, 1, 1 x, y = SpaceDimension("x"), SpaceDimension("y") ci, co = Dimension("ci"), Dimension("co") i, j = Dimension("i"), Dimension("j") grid = Grid((nx, ny), dtype=np.float32, dimensions=(x, y)) X = Function(name="xin", dimensions=(ci, x, y), shape=(nchi, nx, ny), grid=grid, space_order=n // 2) dy = Function(name="dy", dimensions=(co, x, y), shape=(ncho, nx, ny), grid=grid, space_order=n // 2) dW = Function(name="dW", dimensions=(co, ci, i, j), shape=(ncho, nchi, n, m), grid=grid) eq = [ Eq( dW[co, ci, i, j], dW[co, ci, i, j] + dy[co, x, y] * X[ci, x + i - n // 2, y + j - m // 2]) for i in range(n) for j in range(m) ] op = Operator(eq, opt=('advanced', {'openmp': True})) assert_structure(op, ['co,ci,x,y']) iterations = FindNodes(Iteration).visit(op) assert iterations[0].ncollapsed == 1 assert iterations[1].is_Vectorized assert iterations[2].is_Sequential assert iterations[3].is_Sequential
def test_subdimleft_parallel(self): """ Tests application of an Operator consisting of a subdimension defined over different sub-regions, explicitly created through the use of SubDimensions. This tests that flow direction is not being automatically inferred from whether the subdimension is on the left or right boundary. """ grid = Grid(shape=(20, 20)) x, y = grid.dimensions t = grid.stepping_dim thickness = 4 u = TimeFunction(name='u', save=None, grid=grid, space_order=0, time_order=1) xl = SubDimension.left(name='xl', parent=x, thickness=thickness) yi = SubDimension.middle(name='yi', parent=y, thickness_left=thickness, thickness_right=thickness) # Can be done in parallel eq = Eq(u[t + 1, xl, yi], u[t, xl, yi] + 1) op = Operator([eq]) iterations = FindNodes(Iteration).visit(op) assert all(i.is_Affine and i.is_Parallel for i in iterations if i.dim in [xl, yi]) op.apply(time_m=0, time_M=0) assert np.all(u.data[1, 0:thickness, 0:thickness] == 0) assert np.all(u.data[1, 0:thickness, -thickness:] == 0) assert np.all(u.data[1, 0:thickness, thickness:-thickness] == 1) assert np.all(u.data[1, thickness + 1:, :] == 0)
def test_avoid_redundant_haloupdate(self): grid = Grid(shape=(12, )) x = grid.dimensions[0] t = grid.stepping_dim i = Dimension(name='i') j = Dimension(name='j') f = TimeFunction(name='f', grid=grid) g = Function(name='g', grid=grid) op = Operator([ Eq(f.forward, f[t, x - 1] + f[t, x + 1] + 1.), Inc(f[t + 1, i], f[t + 1, i] + 1.), # no halo update as it's an Inc Eq(g, f[t, j] + 1) ]) # access `f` at `t`, not `t+1`! calls = FindNodes(Call).visit(op) assert len(calls) == 1
def mark_halospots(analysis): """Update the ``analysis`` detecting the ``REDUNDANT`` HaloSpots within ``analysis.iet``.""" properties = OrderedDict() def analyze(fmapper, scope): for f, hse in fmapper.items(): if any(dep.cause & set(hse.loc_indices) for dep in scope.d_anti.project(f)): return False return True for i, scope in analysis.scopes.items(): mapper = as_mapper(FindNodes(HaloSpot).visit(i), lambda hs: hs.halo_scheme) for k, v in mapper.items(): if len(v) == 1: continue if analyze(k.fmapper, scope): properties.update({i: REDUNDANT for i in v[1:]}) analysis.update(properties)
def test_collapsing(self): grid = Grid(shape=(3, 3, 3)) u = TimeFunction(name='u', grid=grid) op = Operator(Eq(u.forward, u + 1), dle=('blocking', 'openmp')) # Does it compile? Honoring the OpenMP specification isn't trivial assert op.cfunction # Does it produce the right result op.apply(t_M=9) assert np.all(u.data[0] == 10) iterations = FindNodes(Iteration).visit(op._func_table['bf0']) assert iterations[0].pragmas[ 0].value == 'omp for collapse(2) schedule(static,1)' assert iterations[2].pragmas[0].value ==\ ('omp parallel for collapse(2) schedule(static,1) num_threads(%d)' % nhyperthreads())
def _create_call_graph(self): dag = DAG(nodes=['root']) queue = ['root'] while queue: caller = queue.pop(0) callees = FindNodes(Call).visit(self.efuncs[caller]) for callee in filter_ordered([i.name for i in callees]): if callee in self.efuncs: # Exclude foreign Calls, e.g., MPI calls try: dag.add_node(callee) queue.append(callee) except KeyError: # `callee` already in `dag` pass dag.add_edge(callee, caller) # Sanity check assert dag.size == len(self.efuncs) return dag
def test_consistency_anti_dependences(self, exprs, axis, expected, visit, ti0, ti1, ti3, tu, tv, tw): """ Test that anti dependences end up generating multi loop nests, rather than a single loop nest enclosing all of the equations. """ eq1, eq2, eq3 = EVAL(exprs, ti0.base, ti1.base, ti3.base, tu.base, tv.base, tw.base) op = Operator([eq1, eq2, eq3], dse='noop', dle='noop', time_axis=axis) trees = retrieve_iteration_tree(op) iters = FindNodes(Iteration).visit(op) assert len(trees) == len(expected) # mapper just makes it quicker to write out the test parametrization mapper = {'time': 't'} assert [ "".join(mapper.get(i.dim.name, i.dim.name) for i in j) for j in trees ] == expected assert "".join(mapper.get(i.dim.name, i.dim.name) for i in iters) == visit
def iet_lower_steppers(iet): """ Replace the :class:`SteppingDimension`s within ``iet``'s expressions with suitable :class:`ModuloDimension`s. """ for i in FindNodes(Iteration).visit(iet): if not i.uindices: # Be quick: avoid uselessy reconstructing nodes continue # In an expression, there could be `u[t+1, ...]` and `v[t+1, ...]`, where # `u` and `v` are TimeFunction with circular time buffers (save=None) *but* # different modulo extent. The `t+1` indices above are therefore conceptually # different, so they will be replaced with the proper ModuloDimension through # two different calls to `xreplace` groups = as_mapper(i.uindices, lambda d: d.modulo) for k, v in groups.items(): mapper = {d.origin: d for d in v} rule = lambda i: i.function._time_size == k iet = ReplaceStepIndices(mapper, rule).visit(iet) return iet
def test_grouping(self): """ Test that Clusters over the same set of ConditionalDimensions fall within the same Conditional. This is a follow up to issue #1610. """ grid = Grid(shape=(10, 10)) time = grid.time_dim cond = ConditionalDimension(name='cond', parent=time, condition=time < 5) u = TimeFunction(name='u', grid=grid, space_order=4) # We use a SubDomain only to keep the two Eqs separated eqns = [Eq(u.forward, u + 1, subdomain=grid.interior), Eq(u.forward, u.dx.dx + 1., implicit_dims=[cond])] op = Operator(eqns, opt=('advanced-fsg', {'cire-mincost-sops': 1})) conds = FindNodes(Conditional).visit(op) assert len(conds) == 1 assert len(retrieve_iteration_tree(conds[0].then_body)) == 2
def test_consistency_coupled_wo_ofs(self, tu, tv, ti0, t0, t1): """ Test that no matter what is the order in which the equations are provided to an Operator, the resulting loop nest is the same. None of the array accesses in the equations use offsets. """ eq1 = Eq(tu, tv*ti0*t0 + ti0*t1) eq2 = Eq(ti0, tu + t0*3.) eq3 = Eq(tv, ti0*tu) op1 = Operator([eq1, eq2, eq3], dse='noop', dle='noop') op2 = Operator([eq2, eq1, eq3], dse='noop', dle='noop') op3 = Operator([eq3, eq2, eq1], dse='noop', dle='noop') trees = [retrieve_iteration_tree(i) for i in [op1, op2, op3]] assert all(len(i) == 1 for i in trees) trees = [i[0] for i in trees] for tree in trees: assert IsPerfectIteration().visit(tree[0]) exprs = FindNodes(Expression).visit(tree[-1]) assert len(exprs) == 3
def test_iteration_property_parallel(self, exprs, expected): """Tests detection of sequental and parallel Iterations when applying equations over different subdomains.""" grid = Grid(shape=(20, 20)) x, y = grid.dimensions # noqa t = grid.time_dim # noqa interior = grid.interior # noqa u = TimeFunction(name='u', grid=grid, save=10, time_order=1) # noqa # List comprehension would need explicit locals/globals mappings to eval for i, e in enumerate(list(exprs)): exprs[i] = eval(e) op = Operator(exprs) iterations = FindNodes(Iteration).visit(op) assert all(i.is_Sequential for i in iterations if i.dim.name != expected) assert all(i.is_Parallel for i in iterations if i.dim.name == expected)
def test_consistency_coupled_w_ofs(self, exprs, ti0, ti1, ti3): """ Test that no matter what is the order in which the equations are provided to an Operator, the resulting loop nest is the same. The array accesses in the equations may or may not use offsets; these impact the loop bounds, but not the resulting tree structure. """ eq1, eq2, eq3 = EVAL(exprs, ti0.base, ti1.base, ti3.base) op1 = Operator([eq1, eq2, eq3], dse='noop', dle='noop') op2 = Operator([eq2, eq1, eq3], dse='noop', dle='noop') op3 = Operator([eq3, eq2, eq1], dse='noop', dle='noop') trees = [retrieve_iteration_tree(i) for i in [op1, op2, op3]] assert all(len(i) == 1 for i in trees) trees = [i[0] for i in trees] for tree in trees: assert IsPerfectIteration().visit(tree[0]) exprs = FindNodes(Expression).visit(tree[-1]) assert len(exprs) == 3
def make_grid_accesses(node, yk_grid_objs): """ Construct a new Iteration/Expression based on ``node``, in which all :class:`types.Indexed` accesses have been converted into YASK grid accesses. """ def make_grid_gets(expr): mapper = {} indexeds = retrieve_indexed(expr) data_carriers = [i for i in indexeds if i.base.function.from_YASK] for i in data_carriers: args = [ListInitializer([INT(make_grid_gets(j)) for j in i.indices])] mapper[i] = make_sharedptr_funcall(namespace['code-grid-get'], args, yk_grid_objs[i.base.function.name]) return expr.xreplace(mapper) mapper = {} for i, e in enumerate(FindNodes(Expression).visit(node)): if e.is_ForeignExpression: continue lhs, rhs = e.expr.args # RHS translation rhs = make_grid_gets(rhs) # LHS translation if e.write.from_YASK: args = [rhs] args += [ListInitializer([INT(make_grid_gets(i)) for i in lhs.indices])] call = namespace['code-grid-add' if e.is_Increment else 'code-grid-put'] handle = make_sharedptr_funcall(call, args, yk_grid_objs[e.write.name]) processed = ForeignExpression(handle, e.dtype, is_Increment=e.is_Increment) else: # Writing to a scalar temporary processed = e._rebuild(expr=e.expr.func(lhs, rhs)) mapper.update({e: processed}) return Transformer(mapper).visit(node)
def process(self, iet): def key(s): # The SyncOps are to be processed in the following order return [WaitLock, WithLock, Delete, FetchWait, FetchWaitPrefetch].index(s) callbacks = { WaitLock: self._make_waitlock, WithLock: self._make_withlock, FetchWait: self._make_fetchwait, FetchWaitPrefetch: self._make_fetchwaitprefetch, Delete: self._make_delete } sync_spots = FindNodes(SyncSpot).visit(iet) if not sync_spots: return iet, {} pieces = namedtuple('Pieces', 'init finalize funcs threads')([], [], [], []) subs = {} for n in sync_spots: mapper = as_mapper(n.sync_ops, lambda i: type(i)) for _type in sorted(mapper, key=key): subs[n] = callbacks[_type](subs.get(n, n), mapper[_type], pieces, iet) iet = Transformer(subs).visit(iet) # Add initialization and finalization code init = List(body=pieces.init, footer=c.Line()) finalize = List(header=c.Line(), body=pieces.finalize) iet = iet._rebuild(body=(init, ) + iet.body + (finalize, )) return iet, { 'efuncs': pieces.funcs, 'includes': ['pthread.h'], 'args': [i.size for i in pieces.threads if not is_integer(i.size)] }
def test_redo_haloupdate_due_to_antidep(self): grid = Grid(shape=(12,)) x = grid.dimensions[0] t = grid.stepping_dim f = TimeFunction(name='f', grid=grid) g = TimeFunction(name='g', grid=grid) op = Operator([Eq(f.forward, f[t, x-1] + f[t, x+1] + 1.), Eq(g.forward, f[t+1, x-1] + f[t+1, x+1] + g)]) op.apply(time=0) calls = FindNodes(Call).visit(op) assert len(calls) == 2 assert np.all(f.data_ro_domain[1] == 1.) glb_pos_map = f.grid.distributor.glb_pos_map if LEFT in glb_pos_map[x]: assert np.all(g.data_ro_domain[1, 1:] == 2.) else: assert np.all(g.data_ro_domain[1, :-1] == 2.)
def test_simd_space_invariant(self): """ Similar to test_space_invariant_v3, testing simd vectorization happens in the correct place. """ grid = Grid(shape=(10, 10, 10)) x, y, z = grid.dimensions f = Function(name='f', grid=grid) eq = Inc(f, cos(x * y) + cos(x * z)) op = Operator(eq, opt=('advanced', {'openmp': True})) iterations = FindNodes(Iteration).visit(op) assert 'omp for collapse(1) schedule(static,1)' in iterations[ 0].pragmas[0].value assert 'omp simd' in iterations[1].pragmas[0].value assert 'omp simd' in iterations[3].pragmas[0].value op.apply() assert np.isclose(np.linalg.norm(f.data), 37.1458, rtol=1e-5)
def _make_parallel_tree(self, root, candidates): """Return a mapper to parallelize the Iterations within ``root``.""" ncollapse = self._ncollapse(root, candidates) parallel = self.lang['for'](ncollapse) pragmas = root.pragmas + (parallel,) properties = root.properties + (COLLAPSED(ncollapse),) # Introduce the `omp for` pragma mapper = OrderedDict() if root.is_ParallelAtomic: # Introduce the `omp atomic` pragmas exprs = FindNodes(Expression).visit(root) subs = {i: List(header=self.lang['atomic'], body=i) for i in exprs if i.is_Increment} handle = Transformer(subs).visit(root) mapper[root] = handle._rebuild(pragmas=pragmas, properties=properties) else: mapper[root] = root._rebuild(pragmas=pragmas, properties=properties) return mapper
def mark_halospot_useless(analysis): """ Update the ``analysis`` detecting the USELESS HaloSpots within ``analysis.iet``. """ properties = OrderedDict() for i, scope in analysis.scopes.items(): for hs in FindNodes(HaloSpot).visit(i): # A HaloSpot is USELESS if *all* reads along the HaloSpot's `loc_indices` # pertain to an increment expression test = False for f, hse in hs.fmapper.items(): for d, v in hse.loc_indices.items(): readat = v.origin if d.is_Stepping else v reads = [r for r in scope.reads[f] if r[d] == readat] if any(not r.is_increment for r in reads): test = True break if not test: properties[hs] = USELESS analysis.update(properties)
def test_scheduling(self): """ Affine iterations -> #pragma omp ... schedule(dynamic,1) ... Non-affine iterations -> #pragma omp ... schedule(dynamic,chunk_size) ... """ grid = Grid(shape=(11, 11)) u = TimeFunction(name='u', grid=grid, time_order=2, save=5, space_order=0) sf1 = SparseTimeFunction(name='s', grid=grid, npoint=1, nt=5) eqns = [Eq(u.forward, u + 1)] eqns += sf1.interpolate(u) op = Operator(eqns, dle='openmp') iterations = FindNodes(Iteration).visit(op) assert len(iterations) == 4 assert iterations[1].is_Affine assert 'schedule(dynamic,1)' in iterations[1].pragmas[0].value assert not iterations[3].is_Affine assert 'schedule(dynamic,chunk_size)' in iterations[3].pragmas[0].value
def test_affiness(self): """ Test for issue #1616. """ nt = 19 grid = Grid(shape=(11, 11)) time = grid.time_dim factor = 4 time_subsampled = ConditionalDimension('t_sub', parent=time, factor=factor) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=(nt+factor-1)//factor, time_dim=time_subsampled) eqns = [Eq(u.forward, u + 1.), Eq(usave, u)] op = Operator(eqns) iterations = [i for i in FindNodes(Iteration).visit(op) if i.dim is not time] assert all(i.is_Affine for i in iterations)
def test_cache_blocking_structure_optrelax(): grid = Grid(shape=(8, 8, 8)) u = TimeFunction(name="u", grid=grid, space_order=2) src = SparseTimeFunction(name="src", grid=grid, nt=3, npoint=1, coordinates=np.array([(0.5, 0.5, 0.5)])) eqns = [Eq(u.forward, u.dx)] eqns += src.inject(field=u.forward, expr=src) op = Operator(eqns, opt=('advanced', {'blockrelax': True})) bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'}) iters = FindNodes(Iteration).visit(bns['p_src0_blk0']) assert len(iters) == 2 assert iters[0].dim.is_Block assert iters[1].dim.is_Block
def test_cache_blocking_structure_optpartile(par_tile, expected): grid = Grid(shape=(8, 8, 8)) u = TimeFunction(name="u", grid=grid, space_order=4) v = TimeFunction(name="v", grid=grid, space_order=4) eqns = [Eq(u.forward, u.dx), Eq(v.forward, u.forward.dx)] op = Operator(eqns, opt=('advanced', { 'par-tile': par_tile, 'blockinner': True })) bns, _ = assert_blocking(op, {'x0_blk0', 'x1_blk0'}) assert len(bns) == len(expected) for root, v in zip(bns.values(), expected): iters = FindNodes(Iteration).visit(root) iters = [i for i in iters if i.dim.is_Block and i.dim._depth == 1] assert len(iters) == len(v) assert all(i.step == j for i, j in zip(iters, v))
def test_loops_collapsed(fe, t0, t1, t2, t3, exprs, expected, iters): scope = [fe, t0, t1, t2, t3] node_exprs = [Expression(DummyEq(EVAL(i, *scope))) for i in exprs] ast = iters[6](iters[7](iters[8](node_exprs))) ast = iet_analyze(ast) nodes = transform(ast, mode='openmp').nodes iterations = FindNodes(Iteration).visit(nodes) assert len(iterations) == len(expected) # Check for presence of pragma omp for i, j in zip(iterations, expected): pragmas = i.pragmas if j is True: assert len(pragmas) == 1 pragma = pragmas[0] assert 'omp for collapse' in pragma.value else: for k in pragmas: assert 'omp for collapse' not in k.value
def _hoist_prodders(self, iet): """ Move Prodders within the outer levels of an Iteration tree. """ mapper = {} for tree in retrieve_iteration_tree(iet): for prodder in FindNodes(Prodder).visit(tree.root): if prodder._periodic: try: key = lambda i: isinstance(i.dim, BlockDimension) candidate = filter_iterations(tree, key)[-1] except IndexError: # Fallback: use the outermost Iteration candidate = tree.root mapper[candidate] = candidate._rebuild( nodes=(candidate.nodes + (prodder._rebuild(), ))) mapper[prodder] = None iet = Transformer(mapper, nested=True).visit(iet) return iet, {}
def test_iteration_parallelism_2d(self, exprs, atomic, parallel): """Tests detection of PARALLEL_* properties.""" grid = Grid(shape=(10, 10)) time = grid.time_dim # noqa t = grid.stepping_dim # noqa x, y = grid.dimensions # noqa p = Dimension(name='p') d = Dimension(name='d') rx = Dimension(name='rx') ry = Dimension(name='ry') u = Function(name='u', grid=grid) # noqa v = TimeFunction(name='v', grid=grid, save=None) # noqa w = TimeFunction(name='w', grid=grid, save=None) # noqa cx = Function(name='coeff_x', dimensions=(p, rx), shape=(1, 2)) # noqa cy = Function(name='coeff_y', dimensions=(p, ry), shape=(1, 2)) # noqa gp = Function(name='gridpoints', dimensions=(p, d), shape=(1, 2)) # noqa src = Function(name='src', dimensions=(p, ), shape=(1, )) # noqa rcv = Function(name='rcv', dimensions=(time, p), shape=(100, 1), space_order=0) # noqa # List comprehension would need explicit locals/globals mappings to eval for i, e in enumerate(list(exprs)): exprs[i] = eval(e) op = Operator(exprs, opt='openmp') iters = FindNodes(Iteration).visit(op) assert all(i.is_ParallelAtomic for i in iters if i.dim.name in atomic) assert all(not i.is_ParallelAtomic for i in iters if i.dim.name not in atomic) assert all(i.is_Parallel for i in iters if i.dim.name in parallel) assert all(not i.is_Parallel for i in iters if i.dim.name not in parallel)
def test_subdimmiddle_parallel(self, opt): """ Tests application of an Operator consisting of a subdimension defined over different sub-regions, explicitly created through the use of SubDimensions. """ grid = Grid(shape=(20, 20)) x, y = grid.dimensions t = grid.stepping_dim thickness = 4 u = TimeFunction(name='u', save=None, grid=grid, space_order=0, time_order=1) xi = SubDimension.middle(name='xi', parent=x, thickness_left=thickness, thickness_right=thickness) yi = SubDimension.middle(name='yi', parent=y, thickness_left=thickness, thickness_right=thickness) # a 5 point stencil that can be computed in parallel centre = Eq(u[t+1, xi, yi], u[t, xi, yi] + u[t, xi-1, yi] + u[t, xi+1, yi] + u[t, xi, yi-1] + u[t, xi, yi+1]) u.data[0, 10, 10] = 1.0 op = Operator([centre], opt=opt) print(op.ccode) iterations = FindNodes(Iteration).visit(op) assert all(i.is_Affine and i.is_Parallel for i in iterations if i.dim in [xi, yi]) op.apply(time_m=0, time_M=0) assert np.all(u.data[1, 9:12, 10] == 1.0) assert np.all(u.data[1, 10, 9:12] == 1.0) # Other than those, it should all be 0 u.data[1, 9:12, 10] = 0.0 u.data[1, 10, 9:12] = 0.0 assert np.all(u.data[1, :] == 0)
def test_subdimmiddle_notparallel(self): """ Tests application of an Operator consisting of a subdimension defined over different sub-regions, explicitly created through the use of :class:`SubDimension`s. Different from ``test_subdimmiddle_parallel`` because an interior dimension cannot be evaluated in parallel. """ grid = Grid(shape=(20, 20)) x, y = grid.dimensions t = grid.stepping_dim thickness = 4 u = TimeFunction(name='u', save=None, grid=grid, space_order=0, time_order=1) xi = SubDimension.middle(name='xi', parent=x, thickness_left=thickness, thickness_right=thickness) yi = SubDimension.middle(name='yi', parent=y, thickness_left=thickness, thickness_right=thickness) # flow dependencies in x and y which should force serial execution # in reverse direction centre = Eq(u[t+1, xi, yi], u[t, xi, yi] + u[t+1, xi+1, yi+1]) u.data[0, 10, 10] = 1.0 op = Operator([centre]) iterations = FindNodes(Iteration).visit(op) assert all(i.is_Affine and i.is_Sequential for i in iterations if i.dim == xi) assert all(i.is_Affine and i.is_Parallel for i in iterations if i.dim == yi) op.apply(time_m=0, time_M=0) for i in range(4, 11): assert u.data[1, i, i] == 1.0 u.data[1, i, i] = 0.0 assert np.all(u.data[1, :] == 0)
def iet_insert_casts(iet, parameters): """ Transform the input IET inserting the necessary type casts. The type casts are placed at the top of the IET. Parameters ---------- iet : Node The input Iteration/Expression tree. parameters : tuple, optional The symbol that might require casting. """ # Make the generated code less verbose: if a non-Array parameter does not # appear in any Expression, that is, if the parameter is merely propagated # down to another Call, then there's no need to cast it exprs = FindNodes(Expression).visit(iet) need_cast = {i for i in set().union(*[i.functions for i in exprs]) if i.is_Tensor} need_cast.update({i for i in parameters if i.is_Array}) casts = [ArrayCast(i) for i in parameters if i in need_cast] iet = List(body=casts + [iet]) return iet
def test_streaming_postponed_deletion(self): grid = Grid(shape=(10, 10, 10)) u = TimeFunction(name='u', grid=grid) v = TimeFunction(name='v', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=10) eqns = [ Eq(u.forward, u + usave), Eq(v.forward, v + u.forward.dx + usave) ] op = Operator(eqns, platform='nvidiaX', language='openacc', opt=('streaming', 'orchestrate')) sections = FindNodes(Section).visit(op) assert len(sections) == 2 assert str(sections[1].body[0].body[0].footer[1]) ==\ ('#pragma acc exit data delete(usave[time:1][0:usave_vec->size[1]]' '[0:usave_vec->size[2]][0:usave_vec->size[3]])')
def test_iterations_ompized(self, fa, fb, fc, fd, t0, t1, t2, t3, exprs, expected, iters): scope = [fa, fb, fc, fd, t0, t1, t2, t3] node_exprs = [Expression(DummyEq(EVAL(i, *scope))) for i in exprs] ast = iters[6](iters[7](node_exprs)) ast = iet_analyze(ast) iet, _ = transform(ast, mode='openmp') iterations = FindNodes(Iteration).visit(iet) assert len(iterations) == len(expected) # Check for presence of pragma omp for i, j in zip(iterations, expected): pragmas = i.pragmas if j is True: assert len(pragmas) == 1 pragma = pragmas[0] assert 'omp for' in pragma.value else: for k in pragmas: assert 'omp for' not in k.value