def test_capture_vector_temporaries(self): """ Check that all vector temporaries appearing in a offloaded stencil equation are: :: * mapped to a YASK grid, directly in Python-land, * so no memory needs to be allocated in C-land, and * passed down to the generated code, and * re-initializaed to 0. at each operator application """ grid = Grid(shape=(4, 4, 4)) u = TimeFunction(name='yu4D', grid=grid, space_order=0) v = Function(name='yv3D', grid=grid, space_order=0) eqs = [Eq(u.forward, u + cos(v)*2. + cos(v)*cos(v)*3.)] op = Operator(eqs) # Sanity check of the generated code assert 'posix_memalign' not in str(op) assert 'run_solution' in str(op) # No data has been allocated for the temporaries yet assert list(op.yk_solns.values())[0].grids['r1'].is_storage_allocated() is False op.apply(yu4D=u, yv3D=v, time=0) # Temporary data has already been released after execution assert list(op.yk_solns.values())[0].grids['r1'].is_storage_allocated() is False assert np.all(v.data == 0.) assert np.all(u.data[1] == 5.)
def test_subsampling(self): """ Tests (time) subsampling support. This stresses the compiler as two different YASK kernels need to be generated. """ grid = Grid(shape=(8, 8)) time = grid.time_dim nt = 9 u = TimeFunction(name='u', grid=grid) u.data_with_halo[:] = 0. # Setup subsampled function factor = 4 nsamples = (nt+factor-1)//factor times = ConditionalDimension('t_sub', parent=time, factor=factor) usave = TimeFunction(name='usave', grid=grid, save=nsamples, time_dim=times) eqns = [Eq(u.forward, u + 1.), Eq(usave, u)] op = Operator(eqns) op.apply(time=nt-1) # Check numerical correctness assert np.all(usave.data[0] == 0.) assert np.all(usave.data[1] == 4.) assert np.all(usave.data[2] == 8.) # Check code generation solns = FindNodes(ForeignExpression).visit(op) assert len(solns) == 2 assert all('run_solution' in str(i) for i in solns)
def test_subsampled_fd(self): """ Test that the symbolic interface is working for space subsampled functions. """ nt = 19 grid = Grid(shape=(12, 12), extent=(11, 11)) u = TimeFunction(name='u', grid=grid, save=nt, space_order=2) assert(grid.time_dim in u.indices) # Creates subsampled spatial dimensions and according grid dims = tuple([ConditionalDimension(d.name+'sub', parent=d, factor=2) for d in u.grid.dimensions]) grid2 = Grid((6, 6), dimensions=dims) u2 = TimeFunction(name='u2', grid=grid2, save=nt, space_order=1) for i in range(nt): for j in range(u2.data_with_halo.shape[2]): u2.data_with_halo[i, :, j] = np.arange(u2.data_with_halo.shape[2]) eqns = [Eq(u.forward, u + 1.), Eq(u2.forward, u2.dx)] op = Operator(eqns, dse="advanced") op.apply(time_M=nt-2) # Verify that u2[1, x,y]= du2/dx[0, x, y] assert np.allclose(u.data[-1], nt-1) assert np.allclose(u2.data[1], 0.5)
def test_fd_space_staggered(self, space_order, stagger): """ This test compares the discrete finite-difference scheme against polynomials For a given order p, the finite difference scheme should be exact for polynomials of order p :param derivative: name of the derivative to be tested :param space_order: space order of the finite difference stencil """ clear_cache() # dummy axis dimension nx = 100 xx = np.linspace(-1, 1, nx) dx = xx[1] - xx[0] # Symbolic data grid = Grid(shape=(nx,), dtype=np.float32) x = grid.dimensions[0] # Location of the staggered function if stagger == left: off = -.5 side = -x xx2 = xx - off * dx elif stagger == right: off = .5 side = x xx2 = xx[:-1] - off * dx else: off = 0 side = NODE xx2 = xx u = Function(name="u", grid=grid, space_order=space_order, staggered=(side,)) du = Function(name="du", grid=grid, space_order=space_order) # Define polynomial with exact fd coeffs = np.ones((space_order-1,), dtype=np.float32) polynome = sum([coeffs[i]*x**i for i in range(0, space_order-1)]) polyvalues = np.array([polynome.subs(x, xi) for xi in xx2], np.float32) # Fill original data with the polynomial values u.data[:] = polyvalues # True derivative of the polynome Dpolynome = diff(polynome) Dpolyvalues = np.array([Dpolynome.subs(x, xi) for xi in xx], np.float32) # FD derivative, symbolic u_deriv = generic_derivative(u, deriv_order=1, fd_order=space_order, dim=x, stagger=stagger) # Compute numerical FD stencil = Eq(du, u_deriv) op = Operator(stencil, subs={x.spacing: dx}) op.apply() # Check exactness of the numerical derivative except inside space_brd space_border = space_order error = abs(du.data[space_border:-space_border] - Dpolyvalues[space_border:-space_border]) assert np.isclose(np.mean(error), 0., atol=1e-3)
def test_mixed_blocking_nthreads(): grid = Grid(shape=(64, 64, 64)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.forward, f + 1.), dle=('advanced', {'openmp': True})) op.apply(time=100, autotune=True) assert op._state['autotuning'][0]['runs'] == 6 assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 3 assert 'nthreads' in op._state['autotuning'][0]['tuned']
def test_mode_destructive(): """Test autotuning in destructive mode.""" grid = Grid(shape=(64, 64, 64)) f = TimeFunction(name='f', grid=grid, time_order=0) op = Operator(Eq(f, f + 1.), dle=('advanced', {'openmp': False})) op.apply(time=100, autotune=('basic', 'destructive')) # AT is expected to have executed 30 timesteps (6 block shapes, 5 timesteps each) # The operator runs for 101 timesteps # So, overall, f.data[0] is incremented 131 times assert np.all(f.data == 131)
def test_acoustic_wo_src_wo_rec(self): """ Test that the acoustic wave equation runs without crashing in absence of sources and receivers. """ dt = self.model.critical_dt self.u.data[:] = 0.0 op = Operator(self.eqn, subs=self.model.spacing_map) assert 'run_solution' in str(op) op.apply(u=self.u, m=self.m, damp=self.damp, time=10, dt=dt) assert np.linalg.norm(self.u.data[:]) == 0.0
def test_operator_timefunction(): grid = Grid(shape=(3, 3, 3)) f = TimeFunction(name='f', grid=grid, save=3) op = Operator(Eq(f.forward, f + 1)) op.apply(time=0) pkl_op = pickle.dumps(op) new_op = pickle.loads(pkl_op) assert str(op) == str(new_op) new_op.apply(time_m=1, time_M=1, f=f) assert np.all(f.data[2] == 2)
def test_operator_function(): grid = Grid(shape=(3, 3, 3)) f = Function(name='f', grid=grid) op = Operator(Eq(f, f + 1)) op.apply() pkl_op = pickle.dumps(op) new_op = pickle.loads(pkl_op) assert str(op) == str(new_op) new_op.apply(f=f) assert np.all(f.data == 2)
def test_multiple_threads(): """ Test autotuning when different ``num_threads`` for a given OpenMP parallel region are attempted. """ grid = Grid(shape=(64, 64, 64)) v = TimeFunction(name='v', grid=grid) op = Operator(Eq(v.forward, v + 1), dle=('blocking', {'openmp': True})) op.apply(time_M=0, autotune='max') assert op._state['autotuning'][0]['runs'] == 60 # Would be 30 with `aggressive` assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 3
def run_simulation(save=False, dx=0.01, dy=0.01, a=0.5, timesteps=100): nx, ny = int(1 / dx), int(1 / dy) dx2, dy2 = dx**2, dy**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) grid = Grid(shape=(nx, ny)) u = TimeFunction(name='u', grid=grid, save=timesteps if save else None, initializer=initializer, time_order=1, space_order=2) eqn = Eq(u.dt, a * (u.dx2 + u.dy2)) stencil = solve(eqn, u.forward) op = Operator(Eq(u.forward, stencil)) op.apply(time=timesteps-2, dt=dt) return u.data[timesteps - 1]
def test_mode_runtime_backward(): """Test autotuning in runtime mode.""" grid = Grid(shape=(64, 64, 64)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.backward, f + 1.), dle=('advanced', {'openmp': False})) summary = op.apply(time=101, autotune=('basic', 'runtime')) # AT is expected to have attempted 6 block shapes assert op._state['autotuning'][0]['runs'] == 6 # AT is expected to have executed 30 timesteps assert summary['section0'].itershapes[0][0] == 101-30 assert np.all(f.data[0] == 101) assert np.all(f.data[1] == 100)
def test_acoustic_w_src_wo_rec(self): """ Test that the acoustic wave equation runs without crashing in absence of receivers. """ dt = self.model.critical_dt self.u.data[:] = 0.0 eqns = self.eqn eqns += self.src.inject(field=self.u.forward, expr=self.src * dt**2 / self.m) op = Operator(eqns, subs=self.model.spacing_map) assert 'run_solution' in str(op) op.apply(u=self.u, m=self.m, damp=self.damp, src=self.src, dt=dt) exp_u = 154.05 assert np.isclose(np.linalg.norm(self.u.data[:]), exp_u, atol=exp_u*1.e-2)
def test_discarding_runs(): grid = Grid(shape=(64, 64, 64)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.forward, f + 1.), dle=('advanced', {'openmp': True})) op.apply(time=100, nthreads=4, autotune='aggressive') assert op._state['autotuning'][0]['runs'] == 20 assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 3 assert op._state['autotuning'][0]['tuned']['nthreads'] == 4 # With 1 < 4 threads, the AT eventually tries many more combinations op.apply(time=100, nthreads=1, autotune='aggressive') assert op._state['autotuning'][1]['runs'] == 30 assert op._state['autotuning'][1]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][1]['tuned']) == 3 assert op._state['autotuning'][1]['tuned']['nthreads'] == 1
def test_collapsing(self): grid = Grid(shape=(3, 3, 3)) u = TimeFunction(name='u', grid=grid) op = Operator(Eq(u.forward, u + 1), dle=('blocking', 'openmp')) # Does it compile? Honoring the OpenMP specification isn't trivial assert op.cfunction # Does it produce the right result op.apply(t_M=9) assert np.all(u.data[0] == 10) iterations = FindNodes(Iteration).visit(op._func_table['bf0']) assert iterations[0].pragmas[0].value == 'omp for collapse(2) schedule(static,1)' assert iterations[2].pragmas[0].value ==\ ('omp parallel for collapse(2) schedule(static,1) num_threads(%d)' % nhyperthreads())
def test_makeit_ssa(exprs, exp_u, exp_v): """ A test building Operators with non-trivial sequences of input expressions that push hard on the `makeit_ssa` utility function. """ grid = Grid(shape=(4, 4)) x, y = grid.dimensions # noqa u = Function(name='u', grid=grid) # noqa v = Function(name='v', grid=grid) # noqa s = Scalar(name='s') # noqa # List comprehension would need explicit locals/globals mappings to eval for i, e in enumerate(list(exprs)): exprs[i] = eval(e) op = Operator(exprs) op.apply() assert np.all(u.data == exp_u) assert np.all(v.data == exp_v)
def test_mpi_operator(): grid = Grid(shape=(4,)) f = TimeFunction(name='f', grid=grid) g = TimeFunction(name='g', grid=grid) # Using `sum` creates a stencil in `x`, which in turn will # trigger the generation of code for MPI halo exchange op = Operator(Eq(f.forward, f.sum() + 1)) op.apply(time=2) pkl_op = pickle.dumps(op) new_op = pickle.loads(pkl_op) assert str(op) == str(new_op) new_op.apply(time=2, f=g) assert np.all(f.data[0] == [2., 3., 3., 3.]) assert np.all(f.data[1] == [3., 6., 7., 7.]) assert np.all(g.data[0] == f.data[0]) assert np.all(g.data[1] == f.data[1])
def test_const_change(self): """ Test that Constand.data can be set as required. """ n = 5 t = Constant(name='t', dtype=np.int32) grid = Grid(shape=(2, 2)) x, y = grid.dimensions f = TimeFunction(name='f', grid=grid, save=n+1) f.data[:] = 0 eq = Eq(f.dt-1) stencil = Eq(f.forward, solve(eq, f.forward)) op = Operator([stencil]) op.apply(time_m=0, time_M=n-1, dt=1) check = Function(name='check', grid=grid) eq_test = Eq(check, f[t, x, y]) op_test = Operator([eq_test]) for j in range(0, n+1): t.data = j # Ensure constant is being updated correctly op_test.apply(t=t) assert(np.amax(check.data[:], axis=None) == j) assert(np.amin(check.data[:], axis=None) == j)
def test_constants(self): """ Check that :class:`Constant` objects are treated correctly. """ grid = Grid(shape=(4, 4, 4)) c = Constant(name='c', value=2., dtype=grid.dtype) p = SparseTimeFunction(name='points', grid=grid, nt=1, npoint=1) u = TimeFunction(name='yu4D', grid=grid, space_order=0) u.data[:] = 0. op = Operator([Eq(u.forward, u + c), Eq(p[0, 0], 1. + c)]) assert 'run_solution' in str(op) op.apply(yu4D=u, c=c, time=9) # Check YASK did its job and could read constant grids w/o problems assert np.all(u.data[0] == 20.) # Check the Constant could be read correctly even in Devito-land, i.e., # outside of run_solution assert p.data[0][0] == 3. # Check re-executing with another constant gives the correct result c2 = Constant(name='c', value=5.) op.apply(yu4D=u, c=c2, time=2) assert np.all(u.data[0] == 30.) assert np.all(u.data[1] == 35.) assert p.data[0][0] == 6.
def _new_operator3(shape, blockshape=None, dle=None): blockshape = as_tuple(blockshape) grid = Grid(shape=shape) spacing = 0.1 a = 0.5 c = 0.5 dx2, dy2 = spacing**2, spacing**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) # Allocate the grid and set initial condition # Note: This should be made simpler through the use of defaults u = TimeFunction(name='u', grid=grid, time_order=1, space_order=(2, 2, 2)) u.data[0, :] = np.arange(reduce(mul, shape), dtype=np.int32).reshape(shape) # Derive the stencil according to devito conventions eqn = Eq(u.dt, a * (u.dx2 + u.dy2) - c * (u.dxl + u.dyl)) stencil = solve(eqn, u.forward) op = Operator(Eq(u.forward, stencil), dle=dle) blocksizes = get_blocksizes(op, dle, grid, blockshape) op.apply(u=u, t=10, dt=dt, **blocksizes) return u.data[1, :], op
def execute_devito(ui, spacing=0.01, a=0.5, timesteps=500): """Execute diffusion stencil using the devito Operator API.""" nx, ny = ui.shape dx2, dy2 = spacing**2, spacing**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) # Allocate the grid and set initial condition # Note: This should be made simpler through the use of defaults grid = Grid(shape=(nx, ny)) u = TimeFunction(name='u', grid=grid, time_order=1, space_order=2) u.data[0, :] = ui[:] # Derive the stencil according to devito conventions eqn = Eq(u.dt, a * (u.dx2 + u.dy2)) stencil = solve(eqn, u.forward) op = Operator(Eq(u.forward, stencil)) # Execute the generated Devito stencil operator tstart = time.time() op.apply(u=u, t=timesteps, dt=dt) runtime = time.time() - tstart log("Devito: Diffusion with dx=%0.4f, dy=%0.4f, executed %d timesteps in %f seconds" % (spacing, spacing, timesteps, runtime)) return u.data[1, :], runtime
def test_acoustic_w_src_w_rec(self): """ Test that the acoustic wave equation forward operator produces the correct results when running a 3D model also used in ``test_adjointA.py``. """ dt = self.model.critical_dt self.u.data[:] = 0.0 eqns = self.eqn eqns += self.src.inject(field=self.u.forward, expr=self.src * dt**2 / self.m) eqns += self.rec.interpolate(expr=self.u) op = Operator(eqns, subs=self.model.spacing_map) assert 'run_solution' in str(op) op.apply(u=self.u, m=self.m, damp=self.damp, src=self.src, rec=self.rec, dt=dt) # The expected norms have been computed "by hand" looking at the output # of test_adjointA's forward operator w/o using the YASK backend. exp_u = 154.05 exp_rec = 212.15 assert np.isclose(np.linalg.norm(self.u.data[:]), exp_u, atol=exp_u*1.e-2) assert np.isclose(np.linalg.norm(self.rec.data.reshape(-1)), exp_rec, atol=exp_rec*1.e-2)
def test_multiple_blocking(): """ Test that if there are more than one blocked Iteration nests, then the autotuner works "incrementally" -- it starts determining the best block shape for the first Iteration nest, then it moves on to the second one, then the third, etc. IOW, the autotuner must not be attempting the cartesian product of all possible block shapes across the various blocked nests. """ grid = Grid(shape=(64, 64, 64)) u = TimeFunction(name='u', grid=grid, space_order=2) v = TimeFunction(name='v', grid=grid) op = Operator([Eq(u.forward, u + 1), Eq(v.forward, u.forward.dx2 + v + 1)], dle=('blocking', {'openmp': False})) # First of all, make sure there are indeed two different loop nests assert 'bf0' in op._func_table assert 'bf1' in op._func_table # 'basic' mode op.apply(time_M=0, autotune='basic') assert op._state['autotuning'][0]['runs'] == 12 # 6 for each Iteration nest assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 4 # 'aggressive' mode op.apply(time_M=0, autotune='aggressive') assert op._state['autotuning'][1]['runs'] == 60 assert op._state['autotuning'][1]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][1]['tuned']) == 4 # With OpenMP, we tune over one more argument (`nthreads`), though the AT # will only attempt one value op = Operator([Eq(u.forward, u + 1), Eq(v.forward, u.forward.dx2 + v + 1)], dle=('blocking', {'openmp': True})) op.apply(time_M=0, autotune='basic') assert op._state['autotuning'][0]['runs'] == 12 assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 5
def test_dynamic_nthreads(self): grid = Grid(shape=(16, 16, 16)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.forward, f + 1.), dle='openmp') # Check num_threads appears in the generated code # Not very elegant, but it does the trick assert 'num_threads(nthreads)' in str(op) # Check `op` accepts the `nthreads` kwarg op.apply(time=0) op.apply(time_m=1, time_M=1, nthreads=4) assert np.all(f.data[0] == 2.) # Check the actual value assumed by `nthreads` assert op.arguments(time=0)['nthreads'] == NThreads.default_value() assert op.arguments(time=0, nthreads=123)['nthreads'] == 123 # user supplied
def test_multiple_middle(self): """ Test Operator with two basic 'middle' subdomains defined. """ class sd0(SubDomain): name = 'd0' def define(self, dimensions): x, y = dimensions return {x: ('middle', 1, 6), y: ('middle', 1, 1)} s_d0 = sd0() class sd1(SubDomain): name = 'd1' def define(self, dimensions): x, y = dimensions return {x: ('middle', 6, 1), y: ('middle', 1, 1)} s_d1 = sd1() grid = Grid(shape=(10, 10), subdomains=(s_d0, s_d1)) f = Function(name='f', grid=grid, dtype=np.int32) eq0 = Eq(f, f+1, subdomain=grid.subdomains['d0']) eq1 = Eq(f, f+2, subdomain=grid.subdomains['d1']) Operator([eq0, eq1])() expected = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 0], [0, 1, 1, 1, 1, 1, 1, 1, 1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 2, 2, 2, 2, 2, 2, 2, 2, 0], [0, 2, 2, 2, 2, 2, 2, 2, 2, 0], [0, 2, 2, 2, 2, 2, 2, 2, 2, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], dtype=np.int32) assert((np.array(f.data) == expected).all())
def initialize_damp(damp, nbl, spacing, mask=False): """ Initialise damping field with an absorbing boundary layer. Parameters ---------- damp : Function The damping field for absorbing boundary condition. nbl : int Number of points in the damping layer. spacing : Grid spacing coefficient. mask : bool, optional whether the dampening is a mask or layer. mask => 1 inside the domain and decreases in the layer not mask => 0 inside the domain and increase in the layer """ dampcoeff = 1.5 * np.log(1.0 / 0.001) / (nbl) eqs = [Eq(damp, 1.0)] if mask else [] for d in damp.dimensions: # left dim_l = SubDimension.left(name='abc_%s_l' % d.name, parent=d, thickness=nbl) pos = Abs((nbl - (dim_l - d.symbolic_min) + 1) / float(nbl)) val = dampcoeff * (pos - sin(2 * np.pi * pos) / (2 * np.pi)) val = -val if mask else val eqs += [Inc(damp.subs({d: dim_l}), val / d.spacing)] # right dim_r = SubDimension.right(name='abc_%s_r' % d.name, parent=d, thickness=nbl) pos = Abs((nbl - (d.symbolic_max - dim_r) + 1) / float(nbl)) val = dampcoeff * (pos - sin(2 * np.pi * pos) / (2 * np.pi)) val = -val if mask else val eqs += [Inc(damp.subs({d: dim_r}), val / d.spacing)] # TODO: Figure out why yask doesn't like it with dse/dle Operator(eqs, name='initdamp', dse='noop', dle='noop')()
def test_multiple_loop_nests(self): """ Compute a simple stencil S, preceded by an "initialization loop" I and followed by a "random loop" R. * S is the trivial equation ``u[t+1,x,y,z] = u[t,x,y,z] + 1``; * I initializes ``u`` to 0; * R adds 2 to another field ``v`` along the ``z`` dimension but only over the planes ``[x=0, y=2]`` and ``[x=0, y=5]``. Out of these three loop nests, only S should be "offloaded" to YASK; indeed, I is outside the time loop, while R does not loop over space dimensions. This test checks that S is the only loop nest "offloaded" to YASK, and that the numerical output is correct. """ u = TimeData(name='yu4D', shape=(12, 12, 12), dimensions=(x, y, z), space_order=0) v = TimeData(name='yv4D', shape=(12, 12, 12), dimensions=(x, y, z), space_order=0) v.data[:] = 0. eqs = [ Eq(u.indexed[0, x, y, z], 0), Eq(u.indexed[1, x, y, z], 0), Eq(u.forward, u + 1.), Eq(v.indexed[t + 1, 0, 2, z], v.indexed[t + 1, 0, 2, z] + 2.), Eq(v.indexed[t + 1, 0, 5, z], v.indexed[t + 1, 0, 5, z] + 2.) ] op = Operator(eqs, subs={t.spacing: 1}) op(yu4D=u, yv4D=v, t=1) assert 'run_solution' in str(op) assert len(retrieve_iteration_tree(op)) == 3 assert np.all(u.data[0] == 0.) assert np.all(u.data[1] == 1.) assert np.all(v.data[0] == 0.) assert np.all(v.data[1, 0, 2] == 2.) assert np.all(v.data[1, 0, 5] == 2.)
def BornOperator(model, source, receiver, space_order=4, kernel='OT2', **kwargs): """ Constructor method for the Linearized Born operator in an acoustic media :param model: :class:`Model` object containing the physical parameters :param source: :class:`PointData` object containing the source geometry :param receiver: :class:`PointData` object containing the acquisition geometry :param time_order: Time discretization order :param space_order: Space discretization order """ m, damp = model.m, model.damp # Create source and receiver symbols src = PointSource(name='src', grid=model.grid, time_range=source.time_range, npoint=source.npoint) rec = Receiver(name='rec', grid=model.grid, time_range=receiver.time_range, npoint=receiver.npoint) # Create wavefields and a dm field u = TimeFunction(name="u", grid=model.grid, save=None, time_order=2, space_order=space_order) U = TimeFunction(name="U", grid=model.grid, save=None, time_order=2, space_order=space_order) dm = Function(name="dm", grid=model.grid, space_order=0) s = model.grid.stepping_dim.spacing eqn1 = iso_stencil(u, m, s, damp, kernel) eqn2 = iso_stencil(U, m, s, damp, kernel, q=-dm*u.dt2) # Add source term expression for u source = src.inject(field=u.forward, expr=src * s**2 / m, offset=model.nbpml) # Create receiver interpolation expression from U receivers = rec.interpolate(expr=U, offset=model.nbpml) # Substitute spacing terms to reduce flops return Operator(eqn1 + source + eqn2 + receivers, subs=model.spacing_map, name='Born', **kwargs)
def test_cire(): grid = Grid(shape=(4, 4, 4)) u = TimeFunction(name='u', grid=grid, space_order=2) u1 = TimeFunction(name='u', grid=grid, space_order=2) eqn = Eq(u.forward, u.dy.dy + 1.) op0 = Operator(eqn, opt=('advanced', {'cire-mingain': 0})) op1 = Operator(eqn, opt=('advanced', { 'linearize': True, 'cire-mingain': 0 })) # Check generated code assert 'uL0' not in str(op0) assert 'uL0' in str(op1) op0.apply(time_M=10) op1.apply(time_M=10, u=u1) assert np.all(u.data == u1.data)
def test_drop_redundants_after_fusion(self): """ Test for detection of redundant aliases that get exposed after Cluster fusion. """ grid = Grid(shape=(10, 10)) t = cos(Function(name="t", grid=grid)) p = sin(Function(name="p", grid=grid)) a = TimeFunction(name="a", grid=grid) b = TimeFunction(name="b", grid=grid) c = TimeFunction(name="c", grid=grid) d = TimeFunction(name="d", grid=grid) e = TimeFunction(name="e", grid=grid) f = TimeFunction(name="f", grid=grid) s1 = SparseTimeFunction(name="s1", grid=grid, npoint=1, nt=2) eqns = [ Eq(a.forward, t * a.dx + p * b.dy), Eq(b.forward, p * b.dx + p * t * a.dy) ] eqns += s1.inject(field=a.forward, expr=s1) eqns += s1.inject(field=b.forward, expr=s1) eqns += [ Eq(c.forward, t * p * a.forward.dx + b.forward.dy), Eq(d.forward, t * d.dx + e.dy + p * a.dt), Eq(e.forward, p * d.dx + e.dy + t * b.dt) ] eqns += [Eq(f.forward, t * p * e.forward.dx + p * d.forward.dy)] op = Operator(eqns) arrays = [i for i in FindSymbols().visit(op) if i.is_Array] assert len(arrays) == 2 assert all(i._mem_heap and not i._mem_external for i in arrays)
def test_iteration_parallelism_2d(self, exprs, atomic, parallel): """Tests detection of PARALLEL_* properties.""" grid = Grid(shape=(10, 10)) time = grid.time_dim # noqa t = grid.stepping_dim # noqa x, y = grid.dimensions # noqa p = Dimension(name='p') d = Dimension(name='d') rx = Dimension(name='rx') ry = Dimension(name='ry') u = Function(name='u', grid=grid) # noqa v = TimeFunction(name='v', grid=grid, save=None) # noqa w = TimeFunction(name='w', grid=grid, save=None) # noqa cx = Function(name='coeff_x', dimensions=(p, rx), shape=(1, 2)) # noqa cy = Function(name='coeff_y', dimensions=(p, ry), shape=(1, 2)) # noqa gp = Function(name='gridpoints', dimensions=(p, d), shape=(1, 2)) # noqa src = Function(name='src', dimensions=(p, ), shape=(1, )) # noqa rcv = Function(name='rcv', dimensions=(time, p), shape=(100, 1), space_order=0) # noqa # List comprehension would need explicit locals/globals mappings to eval for i, e in enumerate(list(exprs)): exprs[i] = eval(e) op = Operator(exprs, opt='openmp') iters = FindNodes(Iteration).visit(op) assert all(i.is_ParallelAtomic for i in iters if i.dim.name in atomic) assert all(not i.is_ParallelAtomic for i in iters if i.dim.name not in atomic) assert all(i.is_Parallel for i in iters if i.dim.name in parallel) assert all(not i.is_Parallel for i in iters if i.dim.name not in parallel)
def ForwardOperator(model, geometry, space_order=4, save=False, kernel='OT2', **kwargs): """ Construct a forward modelling operator in an acoustic medium with density. Parameters ---------- model : Model Object containing the physical parameters. geometry : AcquisitionGeometry Geometry object that contains the source (SparseTimeFunction) and receivers (SparseTimeFunction) and their position. space_order : int, optional Space discretization order. save : int or Buffer, optional Saving flag, True saves all time steps. False saves three timesteps. Defaults to False. """ m, damp, irho = model.m, model.damp, model.irho # Create symbols for forward wavefield, source and receivers u = TimeFunction(name='u', grid=model.grid, save=geometry.nt if save else None, time_order=2, space_order=space_order) src = PointSource(name='src', grid=geometry.grid, time_range=geometry.time_axis, npoint=geometry.nsrc) rec = Receiver(name='rec', grid=geometry.grid, time_range=geometry.time_axis, npoint=geometry.nrec) s = model.grid.stepping_dim.spacing eqn = density_stencil(u, m, s, damp, irho) # Construct expression to inject source values src_term = src.inject(field=u.forward, expr=src*s**2/(irho*m)) # Create interpolation expression for receivers rec_term = rec.interpolate(expr=u) # Substitute spacing terms to reduce flops return Operator(eqn + src_term + rec_term, subs=model.spacing_map, name='Forward', **kwargs)
def test_misc_dims(self): """ Tests grid-independent :class:`Function`s, which require YASK's "misc" dimensions. """ dx = Dimension(name='dx') grid = Grid(shape=(10, 10)) x, y = grid.dimensions time = grid.time_dim u = TimeFunction(name='u', grid=grid, time_order=1, space_order=4, save=4) c = Function(name='c', dimensions=(x, dx), shape=(10, 5)) step = Eq(u.forward, ( u[time, x-2, y] * c[x, 0] + u[time, x-1, y] * c[x, 1] + u[time, x, y] * c[x, 2] + u[time, x+1, y] * c[x, 3] + u[time, x+2, y] * c[x, 4])) for i in range(10): c.data[i, 0] = 1.0+i c.data[i, 1] = 1.0+i c.data[i, 2] = 3.0+i c.data[i, 3] = 6.0+i c.data[i, 4] = 5.0+i u.data[:] = 0.0 u.data[0, 2, :] = 2.0 op = Operator(step) assert 'run_solution' in str(op) op(time_m=0, time_M=0) assert(np.all(u.data[1, 0, :] == 10.0)) assert(np.all(u.data[1, 1, :] == 14.0)) assert(np.all(u.data[1, 2, :] == 10.0)) assert(np.all(u.data[1, 3, :] == 8.0)) assert(np.all(u.data[1, 4, :] == 10.0)) assert(np.all(u.data[1, 5:10, :] == 0.0))
def test_at_w_mpi(): """Make sure autotuning works in presence of MPI. MPI ranks work in isolation to determine the best block size, locally.""" grid = Grid(shape=(8, 8)) t = grid.stepping_dim x, y = grid.dimensions f = TimeFunction(name='f', grid=grid, time_order=1) f.data_with_halo[:] = 1. eq = Eq(f.forward, f[t, x, y-1] + f[t, x, y+1]) op = Operator(eq, dle=('advanced', {'openmp': False, 'blockinner': True})) op.apply(time=-1, autotune=('basic', 'runtime')) # Nothing really happened, as not enough timesteps assert np.all(f.data_ro_domain[0] == 1.) assert np.all(f.data_ro_domain[1] == 1.) # The 'destructive' mode writes directly to `f` for whatever timesteps required # to perform the autotuning. Eventually, the result is complete garbage; note # also that this autotuning mode disables the halo exchanges op.apply(time=-1, autotune=('basic', 'destructive')) assert np.all(f._data_ro_with_inhalo.sum() == 904) # Check the halo hasn't been touched during AT glb_pos_map = grid.distributor.glb_pos_map if LEFT in glb_pos_map[y]: assert np.all(f._data_ro_with_inhalo[:, :, -1] == 1) else: assert np.all(f._data_ro_with_inhalo[:, :, 0] == 1) # Finally, try running w/o AT, just to be sure nothing was broken f.data_with_halo[:] = 1. op.apply(time=2) if LEFT in glb_pos_map[y]: assert np.all(f.data_ro_domain[1, :, 0] == 5.) assert np.all(f.data_ro_domain[1, :, 1] == 7.) assert np.all(f.data_ro_domain[1, :, 2:4] == 8.) else: assert np.all(f.data_ro_domain[1, :, 4:6] == 8) assert np.all(f.data_ro_domain[1, :, 6] == 7) assert np.all(f.data_ro_domain[1, :, 7] == 5)
def test_tti_v2_rewrite_aggressive_opcounts(space_order, expected): grid = Grid(shape=(3, 3, 3)) s = 0.00067 u = TimeFunction(name='u', grid=grid, space_order=space_order) v = TimeFunction(name='v', grid=grid, space_order=space_order) f = Function(name='f', grid=grid) g = Function(name='g', grid=grid) m = Function(name='m', grid=grid) e = Function(name='e', grid=grid) d = Function(name='d', grid=grid) ang0 = cos(f) ang1 = sin(f) ang2 = cos(g) ang3 = sin(g) H1u = (ang1 * ang1 * ang2 * ang2 * u.dx2 + ang1 * ang1 * ang3 * ang3 * u.dy2 + ang0 * ang0 * u.dz2 + 2 * ang1 * ang1 * ang3 * ang2 * u.dxdy + 2 * ang0 * ang1 * ang3 * u.dydz + 2 * ang0 * ang1 * ang2 * u.dxdz) H2u = -H1u + u.laplace H1v = (ang1 * ang1 * ang2 * ang2 * v.dx2 + ang1 * ang1 * ang3 * ang3 * v.dy2 + ang0 * ang0 * v.dz2 + 2 * ang1 * ang1 * ang3 * ang2 * v.dxdy + 2 * ang0 * ang1 * ang3 * v.dydz + 2 * ang0 * ang1 * ang2 * v.dxdz) H2v = -H1v + v.laplace eqns = [ Eq(u.forward, (2 * u - u.backward) + s**2 / m * (e * H2u + H1v)), Eq(v.forward, (2 * v - v.backward) + s**2 / m * (d * H2v + H1v)) ] op = Operator(eqns, dse='aggressive') sections = list(op._profiler._sections.values()) assert len(sections) == 2 assert sections[0].sops == 4 assert sections[1].sops == expected
def test_adjoint_inject_interpolate(shape, coords, npoints=19): a = unit_box(shape=shape) a.data[:] = 0. c = unit_box(shape=shape, name='c') c.data[:] = 27. # Inject receiver p = points(a.grid, ranges=coords, npoints=npoints) p.data[:] = 1.2 expr = p.inject(field=a, expr=p) # Read receiver p2 = points(a.grid, name='points2', ranges=coords, npoints=npoints) expr2 = p2.interpolate(expr=c) Operator(expr + expr2)(a=a, c=c) # < P x, y > - < x, P^T y> # Px => p2 # y => p # x => c # P^T y => a term1 = np.dot(p2.data.reshape(-1), p.data.reshape(-1)) term2 = np.dot(c.data.reshape(-1), a.data.reshape(-1)) assert np.isclose((term1 - term2) / term1, 0., atol=1.e-6)
def test_function_wo(self): grid = Grid(shape=(3, 3, 3)) i = Dimension(name='i') f = Function(name='f', shape=(1, ), dimensions=(i, ), grid=grid) u = TimeFunction(name='u', grid=grid) eqns = [Eq(u.forward, u + 1), Eq(f[0], u[0, 0, 0, 0])] op = Operator(eqns, dle=('noop', {'openmp': True})) assert len(op.body[2].header) == 1 assert len(op.body[2].footer) == 1 assert op.body[2].header[0].value ==\ ('omp target enter data map(to: u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])') assert op.body[2].footer[0].contents[0].value ==\ ('omp target update from(u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])') assert op.body[2].footer[0].contents[1].value ==\ ('omp target exit data map(release: u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])')
def AdjointOperator(model, geometry, space_order=4, kernel='sls', time_order=2, **kwargs): """ Construct an adjoint modelling operator in a viscoacoustic medium. Parameters ---------- model : Model Object containing the physical parameters. geometry : AcquisitionGeometry Geometry object that contains the source (SparseTimeFunction) and receivers (SparseTimeFunction) and their position. space_order : int, optional Space discretization order. kernel : selects a visco-acoustic equation from the options below: sls (Standard Linear Solid) : 1st order - Blanch and Symes (1995) / Dutta and Schuster (2014) viscoacoustic equation 2nd order - Bai et al. (2014) viscoacoustic equation ren - Ren et al. (2014) viscoacoustic equation deng_mcmechan - Deng and McMechan (2007) viscoacoustic equation Defaults to sls 2nd order. """ if time_order == 1: va = VectorTimeFunction(name="va", grid=model.grid, time_order=time_order, space_order=space_order) kwargs.update({'v': va}) pa = TimeFunction(name="pa", grid=model.grid, time_order=time_order, space_order=space_order, staggered=NODE) # Equations kernels eq_kernel = kernels[kernel] eqn = eq_kernel(model, geometry, pa, forward=False, **kwargs) src_term, rec_term = src_rec(pa, model, geometry, forward=False) # Substitute spacing terms to reduce flops return Operator(eqn + src_term + rec_term, subs=model.spacing_map, name='Adjoint', **kwargs)
def test_basic(self): grid = Grid(shape=(3, 3, 3)) u = TimeFunction(name='u', grid=grid) op = Operator(Eq(u.forward, u + 1), platform='nvidiaX', language='openacc') trees = retrieve_iteration_tree(op) assert len(trees) == 1 assert trees[0][1].pragmas[0].value ==\ 'acc parallel loop collapse(3) present(u)' assert op.body[1].header[0].value ==\ ('acc enter data copyin(u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])') assert str(op.body[1].footer[0]) == '' assert op.body[1].footer[1].contents[0].value ==\ ('acc exit data copyout(u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])') assert op.body[1].footer[1].contents[1].value ==\ ('acc exit data delete(u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])')
def GradientOperator(model, source, receiver, space_order=4, save=True, kernel='OT2', **kwargs): """ Constructor method for the gradient operator in an acoustic media :param model: :class:`Model` object containing the physical parameters :param source: :class:`PointData` object containing the source geometry :param receiver: :class:`PointData` object containing the acquisition geometry :param time_order: Time discretization order :param space_order: Space discretization order """ m, damp = model.m, model.damp # Gradient symbol and wavefield symbols grad = Function(name='grad', grid=model.grid) u = TimeFunction(name='u', grid=model.grid, save=source.nt if save else None, time_order=2, space_order=space_order) v = TimeFunction(name='v', grid=model.grid, save=None, time_order=2, space_order=space_order) rec = Receiver(name='rec', grid=model.grid, ntime=receiver.nt, npoint=receiver.npoint) s = model.grid.stepping_dim.spacing eqn = iso_stencil(v, m, s, damp, kernel, forward=False) if kernel == 'OT2': gradient_update = Eq(grad, grad - u.dt2 * v) elif kernel == 'OT4': gradient_update = Eq(grad, grad - (u.dt2 + s**2 / 12.0 * u.laplace2(m**(-2))) * v) else: error("Unrecognized kernel, has to be OT2 or OT4") # Add expression for receiver injection receivers = rec.inject(field=v.backward, expr=rec * s**2 / m, offset=model.nbpml) # Substitute spacing terms to reduce flops return Operator(eqn + receivers + [gradient_update], subs=model.spacing_map, name='Gradient', **kwargs)
def test_subdomain_dim(self): """ Test that all dimensions including ones used as an expression are replaced by the subdimension dimensions. """ class sd0(SubDomain): name = 'd0' def define(self, dimensions): x, y = dimensions return {x: ('middle', 1, 6), y: ('middle', 1, 1)} s_d0 = sd0() grid = Grid(shape=(10, 10), subdomains=(s_d0, )) x, y = grid.dimensions x1, y1 = s_d0.dimensions f = Function(name='f', grid=grid, dtype=np.int32) eq0 = Eq(f, x * f + y, subdomain=grid.subdomains['d0']) with timed_region('x'): expr = Operator._lower_exprs([eq0])[0] assert expr.rhs == x1 * f[x1 + 1, y1 + 1] + y1
def initialize_damp(damp, padsizes, spacing, abc_type="damp", fs=False): """ Initialize damping field with an absorbing boundary layer. Parameters ---------- damp : Function The damping field for absorbing boundary condition. nbl : int Number of points in the damping layer. spacing : Grid spacing coefficient. mask : bool, optional whether the dampening is a mask or layer. mask => 1 inside the domain and decreases in the layer not mask => 0 inside the domain and increase in the layer """ eqs = [Eq(damp, 1.0 if abc_type == "mask" else 0.0)] for (nbl, nbr), d in zip(padsizes, damp.dimensions): if not fs or d is not damp.dimensions[-1]: dampcoeff = 1.5 * np.log(1.0 / 0.001) / (nbl) # left dim_l = SubDimension.left(name='abc_%s_l' % d.name, parent=d, thickness=nbl) pos = Abs((nbl - (dim_l - d.symbolic_min) + 1) / float(nbl)) val = dampcoeff * (pos - sin(2*np.pi*pos)/(2*np.pi)) val = -val if abc_type == "mask" else val eqs += [Inc(damp.subs({d: dim_l}), val/d.spacing)] # right dampcoeff = 1.5 * np.log(1.0 / 0.001) / (nbr) dim_r = SubDimension.right(name='abc_%s_r' % d.name, parent=d, thickness=nbr) pos = Abs((nbr - (d.symbolic_max - dim_r) + 1) / float(nbr)) val = dampcoeff * (pos - sin(2*np.pi*pos)/(2*np.pi)) val = -val if abc_type == "mask" else val eqs += [Inc(damp.subs({d: dim_r}), val/d.spacing)] Operator(eqs, name='initdamp')()
def ForwardOperator(model, geometry, space_order=4, save=False, **kwargs): """ Construct method for the forward modelling operator in an elastic media. Parameters ---------- model : Model Object containing the physical parameters. geometry : AcquisitionGeometry Geometry object that contains the source (SparseTimeFunction) and receivers (SparseTimeFunction) and their position. space_order : int, optional Space discretization order. save : int or Buffer Saving flag, True saves all time steps, False saves three buffered indices (last three time steps). Defaults to False. """ wave = kernels[model.grid.dim] pde = wave(model, space_order, geometry.nt if save else None, geometry) # Substitute spacing terms to reduce flops return Operator(pde, subs=model.spacing_map, name='Forward', **kwargs)
def test_function_wo(self): grid = Grid(shape=(3, 3, 3)) i = Dimension(name='i') f = Function(name='f', shape=(1, ), dimensions=(i, ), grid=grid) u = TimeFunction(name='u', grid=grid) eqns = [Eq(u.forward, u + 1), Eq(f[0], u[0, 0, 0, 0])] op = Operator(eqns, opt='noop', language='openmp') assert len(op.body.maps) == 1 assert op.body.maps[0].pragmas[0].value ==\ ('omp target enter data map(to: u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])') assert len(op.body.unmaps) == 2 assert op.body.unmaps[0].pragmas[0].value ==\ ('omp target update from(u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])') assert op.body.unmaps[1].pragmas[0].value ==\ ('omp target exit data map(release: u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]]) if(devicerm)')
def test_catch_largest_time_invariant(self): """ Make sure the DSE extracts the largest time-invariant sub-expressions such that its operation count exceeds a certain threshold. """ grid = Grid((10, 10)) a = Function(name="a", grid=grid, space_order=4) b = Function(name="b", grid=grid, space_order=4) c = Function(name="c", grid=grid, space_order=4) d = Function(name="d", grid=grid, space_order=4) e = TimeFunction(name="e", grid=grid, space_order=4) deriv = (sqrt((a - 2*b)/c) * e.dx).dy + (sqrt((d - 2*c)/a) * e.dy).dx op = Operator(Eq(e.forward, deriv + e)) # We expect two temporary Arrays, one for each `sqrt` subexpr arrays = [i for i in FindSymbols().visit(op) if i.is_Array] assert len(arrays) == 2 assert all(i._mem_heap and not i._mem_external for i in arrays)
def test_blocking(self, opt): grid = Grid(shape=(3, 3, 3)) u = TimeFunction(name='u', grid=grid) op = Operator(Eq(u.forward, u + 1), platform='nvidiaX', language='openmp', opt=opt) trees = retrieve_iteration_tree(op) assert len(trees) == 1 tree = trees[0] assert len(tree) == 7 assert all(i.dim.is_Block for i in tree[1:7]) assert op.parameters[3] is tree[1].step assert op.parameters[6] is tree[2].step assert op.parameters[9] is tree[3].step assert tree[1].pragmas[0].value ==\ 'omp target teams distribute parallel for collapse(3)'
def test_streaming_postponed_deletion(self): grid = Grid(shape=(10, 10, 10)) u = TimeFunction(name='u', grid=grid) v = TimeFunction(name='v', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=10) eqns = [ Eq(u.forward, u + usave), Eq(v.forward, v + u.forward.dx + usave) ] op = Operator(eqns, platform='nvidiaX', language='openacc', opt=('streaming', 'orchestrate')) sections = FindNodes(Section).visit(op) assert len(sections) == 2 assert str(sections[1].body[0].body[0].footer[1]) ==\ ('#pragma acc exit data delete(usave[time:1][0:usave_vec->size[1]]' '[0:usave_vec->size[2]][0:usave_vec->size[3]])')
def test_scheduling_after_rewrite(): """Tests loop scheduling after DSE-induced expression hoisting.""" grid = Grid((10, 10)) u1 = TimeFunction(name="u1", grid=grid, save=10, time_order=2) u2 = TimeFunction(name="u2", grid=grid, time_order=2) sf1 = SparseFunction(name='sf1', grid=grid, npoint=1, ntime=10) const = Function(name="const", grid=grid, space_order=2) # Deliberately inject into u1, rather than u1.forward, to create a WAR eqn1 = Eq(u1.forward, u1 + sin(const)) eqn2 = sf1.inject(u1.forward, expr=sf1) eqn3 = Eq(u2.forward, u2 - u1.dt2 + sin(const)) op = Operator([eqn1] + eqn2 + [eqn3]) trees = retrieve_iteration_tree(op) # Check loop nest structure assert len(trees) == 4 assert all(i.dim == j for i, j in zip(trees[0], grid.dimensions)) # time invariant assert trees[1][0].dim == trees[2][0].dim == trees[3][ 0].dim == grid.time_dim
def test_inject_from_field(shape, coords, result, npoints=19): """Test point injection from a second field along a line through the middle of the grid. """ a = unit_box(shape=shape) spacing = a.data[tuple([1 for _ in shape])] a.data[:] = 0. b = DenseData(name='b', shape=a.data.shape) b.data[:] = 1. p = points(ranges=coords, npoints=npoints) expr = p.inject(field=a, expr=b) Operator(expr, subs={ x.spacing: spacing, y.spacing: spacing, z.spacing: spacing })(a=a, b=b) indices = [slice(4, 6, 1) for _ in coords] indices[0] = slice(1, -1, 1) assert np.allclose(a.data[indices], result, rtol=1.e-5)
def test_basic(self): grid = Grid(shape=(3, 3, 3)) u = TimeFunction(name='u', grid=grid) op = Operator(Eq(u.forward, u + 1)) trees = retrieve_iteration_tree(op) assert len(trees) == 1 assert trees[0][1].pragmas[0].value ==\ 'omp target teams distribute parallel for collapse(3)' assert op.body[1].header[0].value ==\ ('omp target enter data map(to: u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])') assert str(op.body[1].footer[0]) == '' assert op.body[1].footer[1].contents[0].value ==\ ('omp target update from(u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])') assert op.body[1].footer[1].contents[1].value ==\ ('omp target exit data map(release: u[0:u_vec->size[0]]' '[0:u_vec->size[1]][0:u_vec->size[2]][0:u_vec->size[3]])')
def test_parameters(self): """ Tests that we can actually generate code for a trivial operator using constant and array data objects. """ grid = Grid(shape=(3,)) a_dense = Function(name='a_dense', grid=grid) const = Constant(name='constant') eqn = Eq(a_dense, a_dense + 2.*const) op = Operator(eqn) assert len(op.parameters) == 5 assert op.parameters[0].name == 'a_dense' assert op.parameters[0].is_Tensor assert op.parameters[1].name == 'constant' assert op.parameters[1].is_Scalar assert op.parameters[2].name == 'timers' assert op.parameters[2].is_Object assert op.parameters[3].name == 'x_M' assert op.parameters[3].is_Scalar assert op.parameters[4].name == 'x_m' assert op.parameters[4].is_Scalar assert 'a_dense[x + 1] = 2.0F*constant + a_dense[x + 1]' in str(op)
def test_array_rw(self): grid = Grid(shape=(3, 3, 3)) f = Function(name='f', grid=grid) u = TimeFunction(name='u', grid=grid, space_order=2) eqn = Eq(u.forward, u*cos(f*2)) op = Operator(eqn) assert len(op.body[1].header) == 7 assert str(op.body[1].header[0]) == 'float (*r1)[y_size][z_size];' assert op.body[1].header[1].text ==\ 'posix_memalign((void**)&r1, 64, sizeof(float[x_size][y_size][z_size]))' assert op.body[1].header[2].value ==\ 'omp target enter data map(alloc: r1[0:x_size][0:y_size][0:z_size])' assert len(op.body[1].footer) == 6 assert str(op.body[1].footer[0]) == '' assert op.body[1].footer[1].value ==\ 'omp target exit data map(delete: r1[0:x_size][0:y_size][0:z_size])' assert op.body[1].footer[2].text == 'free(r1)'
def test_timeparallel_reduction(self): grid = Grid(shape=(3, 3, 3)) i = Dimension(name='i') f = Function(name='f', shape=(1, ), dimensions=(i, ), grid=grid) u = TimeFunction(name='u', grid=grid) op = Operator(Inc(f[0], u + 1), opt='noop') trees = retrieve_iteration_tree(op) assert len(trees) == 1 tree = trees[0] assert tree.root.is_Sequential assert all(i.is_ParallelRelaxed and not i.is_Parallel for i in tree[1:]) # The time loop is not in OpenMP canonical form, so it won't be parallelized assert not tree.root.pragmas assert len(tree[1].pragmas) == 1 assert tree[1].pragmas[0].value ==\ ('omp target teams distribute parallel for collapse(3)' ' reduction(+:f[0])')
def test_expr_like_lowering(self): """ Test the lowering of an expr-like ConditionalDimension's condition. This test makes an Operator that should indexify and lower the condition passed in the Conditional Dimension """ grid = Grid(shape=(3, 3)) g1 = Function(name='g1', grid=grid) g2 = Function(name='g2', grid=grid) g1.data[:] = 0.49 g2.data[:] = 0.49 x, y = grid.dimensions ci = ConditionalDimension(name='ci', parent=y, condition=Le((g1 + g2), 1.01 * (g1 + g2))) f = Function(name='f', shape=grid.shape, dimensions=(x, ci)) Operator(Eq(f, g1 + g2)).apply() assert np.all(f.data[:] == g1.data[:] + g2.data[:])
def test_no_fusion_convoluted(self): """ Conceptually like `test_no_fusion_simple`, but with more expressions and non-trivial data flow. """ grid = Grid(shape=(4, 4, 4)) time = grid.time_dim f = TimeFunction(name='f', grid=grid) g = Function(name='g', grid=grid) h = Function(name='h', grid=grid) ctime = ConditionalDimension(name='ctime', parent=time, condition=time > 4) eqns = [ Eq(f.forward, f + 1), Eq(h, f + 1), Eq(g, f + 1, implicit_dims=[ctime]), Eq(f.forward, f + 1, implicit_dims=[ctime]), Eq(f.forward, f + 1), Eq(g, f + 1) ] op = Operator(eqns) exprs = FindNodes(Expression).visit(op._func_table['bf0'].root) assert len(exprs) == 3 assert exprs[1].expr.rhs is exprs[0].output assert exprs[2].expr.rhs is exprs[0].output exprs = FindNodes(Expression).visit(op._func_table['bf1'].root) assert len(exprs) == 3 exprs = FindNodes(Expression).visit(op._func_table['bf2'].root) assert len(exprs) == 3 assert exprs[1].expr.rhs is exprs[0].output assert exprs[2].expr.rhs is exprs[0].output
def mat_vec(A, x, b, optimize): """``Ax = b``.""" op = Operator(Inc(b, A*x), dle=optimize) op.apply() info('Executed `Ax = b`')
def transpose_mat_vec(A, x, b, optimize): """``A -> A^T, A^Tx = b``.""" i, j = A.indices op = Operator([Inc(b, A[j, i]*x)], dle=optimize) op.apply() info('Executed `A^Tx = b`')
def mat_mat(A, B, C, optimize): """``AB = C``.""" op = Operator(Inc(C, A*B), dle=optimize) op.apply() info('Executed `AB = C`')
def mat_mat_sum(A, B, C, D, optimize): """``AB + AC = D``.""" op = Operator(Inc(D, A*B + A*C), dle=optimize) op.apply() info('Executed `AB + AC = D`')
def chain_contractions(A, B, C, D, E, F, optimize): """``AB + AC = D, DE = F``.""" op = Operator([Inc(D, A*B + A*C), Inc(F, D*E)], dle=optimize) op.apply() info('Executed `AB + AC = D, DE = F`')