def test_override_sparse_data_fix_dim(self): """ Ensure the arguments are derived correctly for an input SparseFunction. The dimensions are forced to be the same in this case to verify the aliasing on the SparseFunction name. """ grid = Grid(shape=(10, 10)) time = grid.time_dim u = TimeFunction(name='u', grid=grid, time_order=2, space_order=2) original_coords = (1., 1.) new_coords = (2., 2.) p_dim = Dimension(name='p_src') src1 = SparseTimeFunction(name='src1', grid=grid, dimensions=[time, p_dim], nt=10, npoint=1, coordinates=original_coords, time_order=2) src2 = SparseTimeFunction(name='src2', grid=grid, dimensions=[time, p_dim], npoint=1, nt=10, coordinates=new_coords, time_order=2) op = Operator(src1.inject(u, src1)) # Move the source from the location where the setup put it so we can test # whether the override picks up the original coordinates or the changed ones args = op.arguments(src1=src2, time=0) arg_name = src1.name + "_coords" assert(np.array_equal(args[arg_name], np.asarray((new_coords,))))
def test_catch_duplicate_from_different_clusters(self): """ Check that the compiler is able to detect redundant aliases when these stem from different Clusters. """ grid = Grid((10, 10)) a = Function(name="a", grid=grid, space_order=4) b = Function(name="b", grid=grid, space_order=4) c = Function(name="c", grid=grid, space_order=4) d = Function(name="d", grid=grid, space_order=4) s = SparseTimeFunction(name="s", grid=grid, npoint=1, nt=2) e = TimeFunction(name="e", grid=grid, space_order=4) f = TimeFunction(name="f", grid=grid, space_order=4) deriv = (sqrt((a - 2*b)/c) * e.dx).dy + (sqrt((d - 2*c)/a) * e.dy).dx deriv2 = (sqrt((c - 2*b)/c) * f.dy).dx + (sqrt((d - 2*c)/a) * f.dx).dy eqns = ([Eq(e.forward, deriv + e)] + s.inject(e.forward, expr=s) + [Eq(f.forward, deriv2 + f + e.forward.dx)]) op = Operator(eqns) arrays = [i for i in FindSymbols().visit(op) if i.is_Array] assert len(arrays) == 3 assert all(i._mem_heap and not i._mem_external for i in arrays)
def test_cache_blocking_structure_multiple_efuncs(): """ Test cache blocking in multiple nested elemental functions. """ grid = Grid(shape=(4, 4, 4)) x, y, z = grid.dimensions u = TimeFunction(name="u", grid=grid, space_order=2) U = TimeFunction(name="U", grid=grid, space_order=2) src = SparseTimeFunction(name="src", grid=grid, nt=3, npoint=1, coordinates=np.array([(0.5, 0.5, 0.5)])) eqns = [Eq(u.forward, u.dx)] eqns += src.inject(field=u.forward, expr=src) eqns += [Eq(U.forward, U.dx + u.forward)] op = Operator(eqns) for i in ['bf0', 'bf1']: assert i in op._func_table iters = FindNodes(Iteration).visit(op._func_table[i].root) assert len(iters) == 5 assert iters[0].dim.parent is x assert iters[1].dim.parent is y assert iters[4].dim is z assert iters[2].dim.parent is iters[0].dim assert iters[3].dim.parent is iters[1].dim
def test_tile_insteadof_collapse(self, par_tile): grid = Grid(shape=(3, 3, 3)) t = grid.stepping_dim x, y, z = grid.dimensions u = TimeFunction(name='u', grid=grid) src = SparseTimeFunction(name="src", grid=grid, nt=3, npoint=1) eqns = [ Eq( u.forward, u + 1, ), Eq(u[t + 1, 0, y, z], u[t, 0, y, z] + 1.) ] eqns += src.inject(field=u.forward, expr=src) op = Operator(eqns, platform='nvidiaX', language='openacc', opt=('advanced', { 'par-tile': par_tile })) trees = retrieve_iteration_tree(op) assert len(trees) == 4 assert trees[0][1].pragmas[0].value ==\ 'acc parallel loop tile(32,4,4) present(u)' assert trees[1][1].pragmas[0].value ==\ 'acc parallel loop tile(32,4) present(u)' # Only the AFFINE Iterations are tiled assert trees[3][1].pragmas[0].value ==\ 'acc parallel loop collapse(1) present(src,src_coords,u)'
def test_injection_wodup_wtime(self): """ Just like ``test_injection_wodup``, but using a SparseTimeFunction instead of a SparseFunction. Hence, the data scattering/gathering now has to correctly pack/unpack multidimensional arrays. """ grid = Grid(shape=(4, 4), extent=(3.0, 3.0)) save = 3 f = TimeFunction(name='f', grid=grid, save=save, space_order=0) f.data[:] = 0. if grid.distributor.myrank == 0: coords = [(0.5, 0.5), (0.5, 2.5), (2.5, 0.5), (2.5, 2.5)] else: coords = [] sf = SparseTimeFunction(name='sf', grid=grid, nt=save, npoint=len(coords), coordinates=coords) sf.data[0, :] = 4. sf.data[1, :] = 8. sf.data[2, :] = 12. op = Operator(sf.inject(field=f, expr=sf + 1)) op.apply() assert np.all(f.data[0] == 1.25) assert np.all(f.data[1] == 2.25) assert np.all(f.data[2] == 3.25)
def test_override_composite_data(self): grid = Grid(shape=(10, 10)) original_coords = (1., 1.) new_coords = (2., 2.) p_dim = Dimension(name='p_src') u = TimeFunction(name='u', grid=grid, time_order=2, space_order=2) time = u.indices[0] src1 = SparseTimeFunction(name='src1', grid=grid, dimensions=[time, p_dim], npoint=1, nt=10, coordinates=original_coords) src2 = SparseTimeFunction(name='src1', grid=grid, dimensions=[time, p_dim], npoint=1, nt=10, coordinates=new_coords) op = Operator(src1.inject(u, src1)) # Move the source from the location where the setup put it so we can test # whether the override picks up the original coordinates or the changed ones args = op.arguments(src1=src2, t=0) arg_name = src1.name + "_coords" assert (np.array_equal(args[arg_name], np.asarray((new_coords, ))))
def test_cache_blocking_structure_distributed(): """ Test cache blocking in multiple nested elemental functions. """ grid = Grid(shape=(4, 4, 4)) x, y, z = grid.dimensions u = TimeFunction(name="u", grid=grid, space_order=2) U = TimeFunction(name="U", grid=grid, space_order=2) src = SparseTimeFunction(name="src", grid=grid, nt=3, npoint=1, coordinates=np.array([(0.5, 0.5, 0.5)])) eqns = [Eq(u.forward, u.dx)] eqns += src.inject(field=u.forward, expr=src) eqns += [Eq(U.forward, U.dx + u.forward)] op = Operator(eqns) bns0, _ = assert_blocking(op._func_table['compute0'].root, {'x0_blk0'}) bns1, _ = assert_blocking(op, {'x1_blk0'}) for i in [bns0['x0_blk0'], bns1['x1_blk0']]: iters = FindNodes(Iteration).visit(i) assert len(iters) == 5 assert iters[0].dim.parent is x assert iters[1].dim.parent is y assert iters[2].dim.parent is iters[0].dim assert iters[3].dim.parent is iters[1].dim assert iters[4].dim is z
def test_over_injection(): nt = 10 grid = Grid(shape=(4, 4)) src = SparseTimeFunction(name='src', grid=grid, npoint=1, nt=nt) rec = SparseTimeFunction(name='rec', grid=grid, npoint=1, nt=nt) u = TimeFunction(name="u", grid=grid, time_order=2, space_order=2, save=nt) u1 = TimeFunction(name="u", grid=grid, time_order=2, space_order=2, save=nt) src.data[:] = 1. eqns = ([Eq(u.forward, u + 1)] + src.inject(field=u.forward, expr=src) + rec.interpolate(expr=u.forward)) op0 = Operator(eqns, opt='noop') op1 = Operator(eqns, opt='buffering') # Check generated code assert len(retrieve_iteration_tree(op1)) ==\ 5 + bool(configuration['language'] != 'C') buffers = [i for i in FindSymbols().visit(op1) if i.is_Array] assert len(buffers) == 1 op0.apply(time_M=nt - 2) op1.apply(time_M=nt - 2, u=u1) assert np.all(u.data == u1.data)
def test_interpolation(): nt = 10 grid = Grid(shape=(4, 4)) src = SparseTimeFunction(name='src', grid=grid, npoint=1, nt=nt) rec = SparseTimeFunction(name='rec', grid=grid, npoint=1, nt=nt) u = TimeFunction(name="u", grid=grid, time_order=2) u1 = TimeFunction(name="u", grid=grid, time_order=2) src.data[:] = 1. eqns = ([Eq(u.forward, u + 1)] + src.inject(field=u.forward, expr=src) + rec.interpolate(expr=u.forward)) op0 = Operator(eqns, opt='advanced') op1 = Operator(eqns, opt=('advanced', {'linearize': True})) # Check generated code assert 'uL0' not in str(op0) assert 'uL0' in str(op1) op0.apply(time_M=nt - 2) op1.apply(time_M=nt - 2, u=u1) assert np.all(u.data == u1.data)
def test_scheduling_after_rewrite(): """Tests loop scheduling after DSE-induced expression hoisting.""" grid = Grid((10, 10)) u1 = TimeFunction(name="u1", grid=grid, save=10, time_order=2) u2 = TimeFunction(name="u2", grid=grid, time_order=2) sf1 = SparseTimeFunction(name='sf1', grid=grid, npoint=1, nt=10) const = Function(name="const", grid=grid, space_order=2) # Deliberately inject into u1, rather than u1.forward, to create a WAR eqn1 = Eq(u1.forward, u1 + sin(const)) eqn2 = sf1.inject(u1.forward, expr=sf1) eqn3 = Eq(u2.forward, u2 - u1.dt2 + sin(const)) op = Operator([eqn1] + eqn2 + [eqn3]) trees = retrieve_iteration_tree(op) # Check loop nest structure assert len(trees) == 4 assert all(i.dim == j for i, j in zip(trees[0], grid.dimensions)) # time invariant assert trees[1][0].dim == trees[2][0].dim == trees[3][0].dim == grid.time_dim
def test_cache_blocking_structure_optrelax(): grid = Grid(shape=(8, 8, 8)) u = TimeFunction(name="u", grid=grid, space_order=2) src = SparseTimeFunction(name="src", grid=grid, nt=3, npoint=1, coordinates=np.array([(0.5, 0.5, 0.5)])) eqns = [Eq(u.forward, u.dx)] eqns += src.inject(field=u.forward, expr=src) op = Operator(eqns, opt=('advanced', {'blockrelax': True})) bns, _ = assert_blocking(op, {'x0_blk0', 'p_src0_blk0'}) iters = FindNodes(Iteration).visit(bns['p_src0_blk0']) assert len(iters) == 2 assert iters[0].dim.is_Block assert iters[1].dim.is_Block
def test_drop_redundants_after_fusion(self): """ Test for detection of redundant aliases that get exposed after Cluster fusion. """ grid = Grid(shape=(10, 10)) t = cos(Function(name="t", grid=grid)) p = sin(Function(name="p", grid=grid)) a = TimeFunction(name="a", grid=grid) b = TimeFunction(name="b", grid=grid) c = TimeFunction(name="c", grid=grid) d = TimeFunction(name="d", grid=grid) e = TimeFunction(name="e", grid=grid) f = TimeFunction(name="f", grid=grid) s1 = SparseTimeFunction(name="s1", grid=grid, npoint=1, nt=2) eqns = [ Eq(a.forward, t * a.dx + p * b.dy), Eq(b.forward, p * b.dx + p * t * a.dy) ] eqns += s1.inject(field=a.forward, expr=s1) eqns += s1.inject(field=b.forward, expr=s1) eqns += [ Eq(c.forward, t * p * a.forward.dx + b.forward.dy), Eq(d.forward, t * d.dx + e.dy + p * a.dt), Eq(e.forward, p * d.dx + e.dy + t * b.dt) ] eqns += [Eq(f.forward, t * p * e.forward.dx + p * d.forward.dy)] op = Operator(eqns) arrays = [i for i in FindSymbols().visit(op) if i.is_Array] assert len(arrays) == 2 assert all(i._mem_heap and not i._mem_external for i in arrays)
def test_special_symbols(self): """ This test checks the singletonization, through the caching infrastructure, of the special symbols that an Operator may generate (e.g., `nthreads`). """ grid = Grid(shape=(4, 4, 4)) f = TimeFunction(name='f', grid=grid) sf = SparseTimeFunction(name='sf', grid=grid, npoint=1, nt=10) eqns = [Eq(f.forward, f + 1.)] + sf.inject(field=f.forward, expr=sf) opt = ('advanced', {'par-nested': 0, 'openmp': True}) op0 = Operator(eqns, opt=opt) op1 = Operator(eqns, opt=opt) nthreads0, nthreads_nested0, nthreads_nonaffine0 =\ [i for i in op0.input if isinstance(i, NThreadsBase)] nthreads1, nthreads_nested1, nthreads_nonaffine1 =\ [i for i in op1.input if isinstance(i, NThreadsBase)] assert nthreads0 is nthreads1 assert nthreads_nested0 is nthreads_nested1 assert nthreads_nonaffine0 is nthreads_nonaffine1 tid0 = ThreadID(op0.nthreads) tid1 = ThreadID(op0.nthreads) assert tid0 is tid1 did0 = DeviceID() did1 = DeviceID() assert did0 is did1 npt0 = NPThreads(name='npt', size=3) npt1 = NPThreads(name='npt', size=3) npt2 = NPThreads(name='npt', size=4) assert npt0 is npt1 assert npt0 is not npt2
def test_empty_arrays(self): """ MFE for issue #1641. """ grid = Grid(shape=(4, 4), extent=(3.0, 3.0)) f = TimeFunction(name='f', grid=grid, space_order=0) f.data[:] = 1. sf1 = SparseTimeFunction(name='sf1', grid=grid, npoint=0, nt=10) sf2 = SparseTimeFunction(name='sf2', grid=grid, npoint=0, nt=10) assert sf1.size == 0 assert sf2.size == 0 eqns = sf1.inject(field=f, expr=sf1 + sf2 + 1.) op = Operator(eqns) op.apply() assert np.all(f.data == 1.) # Again, but with a MatrixSparseTimeFunction mat = scipy.sparse.coo_matrix((0, 0), dtype=np.float32) sf = MatrixSparseTimeFunction(name="s", grid=grid, r=2, matrix=mat, nt=10) assert sf.size == 0 eqns = sf.interpolate(f) op = Operator(eqns) sf.manual_scatter() op(time_m=0, time_M=9) sf.manual_gather() assert np.all(f.data == 1.)
def solver(I, V, f, c, L, dt, C, T, user_action=None): """Solve u_tt=c^2*u_xx + f on (0,L)x(0,T].""" Nt = int(round(T / dt)) t = np.linspace(0, Nt * dt, Nt + 1) # Mesh points in time dx = dt * c / float(C) Nx = int(round(L / dx)) x = np.linspace(0, L, Nx + 1) # Mesh points in space C2 = C**2 # Help variable in the scheme # Make sure dx and dt are compatible with x and t dx = x[1] - x[0] dt = t[1] - t[0] # Initialising functions f and V if not provided if f is None or f == 0: f = lambda x, t: 0 if V is None or V == 0: V = lambda x: 0 t0 = time.perf_counter() # Measure CPU time # Set up grid grid = Grid(shape=(Nx + 1), extent=(L)) t_s = grid.stepping_dim # Create and initialise u u = TimeFunction(name='u', grid=grid, time_order=2, space_order=2) u.data[:, :] = I(x[:]) x_dim = grid.dimensions[0] t_dim = grid.time_dim # The wave equation we are trying to solve pde = (1 / c**2) * u.dt2 - u.dx2 # Source term and injection into equation dt_symbolic = grid.time_dim.spacing src = SparseTimeFunction(name='f', grid=grid, npoint=Nx + 1, nt=Nt + 1) for i in range(Nt): src.data[i] = f(x, t[i]) src.coordinates.data[:, 0] = x src_term = src.inject(field=u.forward, expr=src * (dt_symbolic**2)) stencil = Eq(u.forward, solve(pde, u.forward)) # Set up special stencil for initial timestep with substitution for u.backward v = Function(name='v', grid=grid, npoint=Nx + 1, nt=1) v.data[:] = V(x[:]) stencil_init = stencil.subs(u.backward, u.forward - dt_symbolic * v) # Boundary conditions bc = [Eq(u[t_s + 1, 0], 0)] bc += [Eq(u[t_s + 1, Nx], 0)] # Create and apply operators op_init = Operator([stencil_init] + src_term + bc) op = Operator([stencil] + src_term + bc) op_init.apply(time_M=1, dt=dt) op.apply(time_m=1, time_M=Nt, dt=dt) cpu_time = time.perf_counter() - t0 return u.data[-1], x, t, cpu_time