def test_capture_vector_temporaries(self): """ Check that all vector temporaries appearing in a offloaded stencil equation are: :: * mapped to a YASK grid, directly in Python-land, * so no memory needs to be allocated in C-land, and * passed down to the generated code, and * re-initializaed to 0. at each operator application """ grid = Grid(shape=(4, 4, 4)) u = TimeFunction(name='yu4D', grid=grid, space_order=0) v = Function(name='yv3D', grid=grid, space_order=0) eqs = [Eq(u.forward, u + cos(v)*2. + cos(v)*cos(v)*3.)] op = Operator(eqs) # Sanity check of the generated code assert 'posix_memalign' not in str(op) assert 'run_solution' in str(op) # No data has been allocated for the temporaries yet assert list(op.yk_solns.values())[0].grids['r1'].is_storage_allocated() is False op.apply(yu4D=u, yv3D=v, time=0) # Temporary data has already been released after execution assert list(op.yk_solns.values())[0].grids['r1'].is_storage_allocated() is False assert np.all(v.data == 0.) assert np.all(u.data[1] == 5.)
def test_subsampling(self): """ Tests (time) subsampling support. This stresses the compiler as two different YASK kernels need to be generated. """ grid = Grid(shape=(8, 8)) time = grid.time_dim nt = 9 u = TimeFunction(name='u', grid=grid) u.data_with_halo[:] = 0. # Setup subsampled function factor = 4 nsamples = (nt+factor-1)//factor times = ConditionalDimension('t_sub', parent=time, factor=factor) usave = TimeFunction(name='usave', grid=grid, save=nsamples, time_dim=times) eqns = [Eq(u.forward, u + 1.), Eq(usave, u)] op = Operator(eqns) op.apply(time=nt-1) # Check numerical correctness assert np.all(usave.data[0] == 0.) assert np.all(usave.data[1] == 4.) assert np.all(usave.data[2] == 8.) # Check code generation solns = FindNodes(ForeignExpression).visit(op) assert len(solns) == 2 assert all('run_solution' in str(i) for i in solns)
def test_subsampled_fd(self): """ Test that the symbolic interface is working for space subsampled functions. """ nt = 19 grid = Grid(shape=(12, 12), extent=(11, 11)) u = TimeFunction(name='u', grid=grid, save=nt, space_order=2) assert(grid.time_dim in u.indices) # Creates subsampled spatial dimensions and according grid dims = tuple([ConditionalDimension(d.name+'sub', parent=d, factor=2) for d in u.grid.dimensions]) grid2 = Grid((6, 6), dimensions=dims) u2 = TimeFunction(name='u2', grid=grid2, save=nt, space_order=1) for i in range(nt): for j in range(u2.data_with_halo.shape[2]): u2.data_with_halo[i, :, j] = np.arange(u2.data_with_halo.shape[2]) eqns = [Eq(u.forward, u + 1.), Eq(u2.forward, u2.dx)] op = Operator(eqns, dse="advanced") op.apply(time_M=nt-2) # Verify that u2[1, x,y]= du2/dx[0, x, y] assert np.allclose(u.data[-1], nt-1) assert np.allclose(u2.data[1], 0.5)
def test_const_change(self): """ Test that Constand.data can be set as required. """ n = 5 t = Constant(name='t', dtype=np.int32) grid = Grid(shape=(2, 2)) x, y = grid.dimensions f = TimeFunction(name='f', grid=grid, save=n+1) f.data[:] = 0 eq = Eq(f.dt-1) stencil = Eq(f.forward, solve(eq, f.forward)) op = Operator([stencil]) op.apply(time_m=0, time_M=n-1, dt=1) check = Function(name='check', grid=grid) eq_test = Eq(check, f[t, x, y]) op_test = Operator([eq_test]) for j in range(0, n+1): t.data = j # Ensure constant is being updated correctly op_test.apply(t=t) assert(np.amax(check.data[:], axis=None) == j) assert(np.amin(check.data[:], axis=None) == j)
def test_fd_space_staggered(self, space_order, stagger): """ This test compares the discrete finite-difference scheme against polynomials For a given order p, the finite difference scheme should be exact for polynomials of order p :param derivative: name of the derivative to be tested :param space_order: space order of the finite difference stencil """ clear_cache() # dummy axis dimension nx = 100 xx = np.linspace(-1, 1, nx) dx = xx[1] - xx[0] # Symbolic data grid = Grid(shape=(nx,), dtype=np.float32) x = grid.dimensions[0] # Location of the staggered function if stagger == left: off = -.5 side = -x xx2 = xx - off * dx elif stagger == right: off = .5 side = x xx2 = xx[:-1] - off * dx else: off = 0 side = NODE xx2 = xx u = Function(name="u", grid=grid, space_order=space_order, staggered=(side,)) du = Function(name="du", grid=grid, space_order=space_order) # Define polynomial with exact fd coeffs = np.ones((space_order-1,), dtype=np.float32) polynome = sum([coeffs[i]*x**i for i in range(0, space_order-1)]) polyvalues = np.array([polynome.subs(x, xi) for xi in xx2], np.float32) # Fill original data with the polynomial values u.data[:] = polyvalues # True derivative of the polynome Dpolynome = diff(polynome) Dpolyvalues = np.array([Dpolynome.subs(x, xi) for xi in xx], np.float32) # FD derivative, symbolic u_deriv = generic_derivative(u, deriv_order=1, fd_order=space_order, dim=x, stagger=stagger) # Compute numerical FD stencil = Eq(du, u_deriv) op = Operator(stencil, subs={x.spacing: dx}) op.apply() # Check exactness of the numerical derivative except inside space_brd space_border = space_order error = abs(du.data[space_border:-space_border] - Dpolyvalues[space_border:-space_border]) assert np.isclose(np.mean(error), 0., atol=1e-3)
def test_mixed_blocking_nthreads(): grid = Grid(shape=(64, 64, 64)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.forward, f + 1.), dle=('advanced', {'openmp': True})) op.apply(time=100, autotune=True) assert op._state['autotuning'][0]['runs'] == 6 assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 3 assert 'nthreads' in op._state['autotuning'][0]['tuned']
def test_mode_destructive(): """Test autotuning in destructive mode.""" grid = Grid(shape=(64, 64, 64)) f = TimeFunction(name='f', grid=grid, time_order=0) op = Operator(Eq(f, f + 1.), dle=('advanced', {'openmp': False})) op.apply(time=100, autotune=('basic', 'destructive')) # AT is expected to have executed 30 timesteps (6 block shapes, 5 timesteps each) # The operator runs for 101 timesteps # So, overall, f.data[0] is incremented 131 times assert np.all(f.data == 131)
def test_acoustic_wo_src_wo_rec(self): """ Test that the acoustic wave equation runs without crashing in absence of sources and receivers. """ dt = self.model.critical_dt self.u.data[:] = 0.0 op = Operator(self.eqn, subs=self.model.spacing_map) assert 'run_solution' in str(op) op.apply(u=self.u, m=self.m, damp=self.damp, time=10, dt=dt) assert np.linalg.norm(self.u.data[:]) == 0.0
def test_multiple_threads(): """ Test autotuning when different ``num_threads`` for a given OpenMP parallel region are attempted. """ grid = Grid(shape=(64, 64, 64)) v = TimeFunction(name='v', grid=grid) op = Operator(Eq(v.forward, v + 1), dle=('blocking', {'openmp': True})) op.apply(time_M=0, autotune='max') assert op._state['autotuning'][0]['runs'] == 60 # Would be 30 with `aggressive` assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 3
def test_operator_function(): grid = Grid(shape=(3, 3, 3)) f = Function(name='f', grid=grid) op = Operator(Eq(f, f + 1)) op.apply() pkl_op = pickle.dumps(op) new_op = pickle.loads(pkl_op) assert str(op) == str(new_op) new_op.apply(f=f) assert np.all(f.data == 2)
def test_operator_timefunction(): grid = Grid(shape=(3, 3, 3)) f = TimeFunction(name='f', grid=grid, save=3) op = Operator(Eq(f.forward, f + 1)) op.apply(time=0) pkl_op = pickle.dumps(op) new_op = pickle.loads(pkl_op) assert str(op) == str(new_op) new_op.apply(time_m=1, time_M=1, f=f) assert np.all(f.data[2] == 2)
def run_simulation(save=False, dx=0.01, dy=0.01, a=0.5, timesteps=100): nx, ny = int(1 / dx), int(1 / dy) dx2, dy2 = dx**2, dy**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) grid = Grid(shape=(nx, ny)) u = TimeFunction(name='u', grid=grid, save=timesteps if save else None, initializer=initializer, time_order=1, space_order=2) eqn = Eq(u.dt, a * (u.dx2 + u.dy2)) stencil = solve(eqn, u.forward) op = Operator(Eq(u.forward, stencil)) op.apply(time=timesteps-2, dt=dt) return u.data[timesteps - 1]
def test_acoustic_w_src_wo_rec(self): """ Test that the acoustic wave equation runs without crashing in absence of receivers. """ dt = self.model.critical_dt self.u.data[:] = 0.0 eqns = self.eqn eqns += self.src.inject(field=self.u.forward, expr=self.src * dt**2 / self.m) op = Operator(eqns, subs=self.model.spacing_map) assert 'run_solution' in str(op) op.apply(u=self.u, m=self.m, damp=self.damp, src=self.src, dt=dt) exp_u = 154.05 assert np.isclose(np.linalg.norm(self.u.data[:]), exp_u, atol=exp_u*1.e-2)
def test_dynamic_nthreads(self): grid = Grid(shape=(16, 16, 16)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.forward, f + 1.), dle='openmp') # Check num_threads appears in the generated code # Not very elegant, but it does the trick assert 'num_threads(nthreads)' in str(op) # Check `op` accepts the `nthreads` kwarg op.apply(time=0) op.apply(time_m=1, time_M=1, nthreads=4) assert np.all(f.data[0] == 2.) # Check the actual value assumed by `nthreads` assert op.arguments(time=0)['nthreads'] == NThreads.default_value() assert op.arguments(time=0, nthreads=123)['nthreads'] == 123 # user supplied
def test_discarding_runs(): grid = Grid(shape=(64, 64, 64)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.forward, f + 1.), dle=('advanced', {'openmp': True})) op.apply(time=100, nthreads=4, autotune='aggressive') assert op._state['autotuning'][0]['runs'] == 20 assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 3 assert op._state['autotuning'][0]['tuned']['nthreads'] == 4 # With 1 < 4 threads, the AT eventually tries many more combinations op.apply(time=100, nthreads=1, autotune='aggressive') assert op._state['autotuning'][1]['runs'] == 30 assert op._state['autotuning'][1]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][1]['tuned']) == 3 assert op._state['autotuning'][1]['tuned']['nthreads'] == 1
def test_collapsing(self): grid = Grid(shape=(3, 3, 3)) u = TimeFunction(name='u', grid=grid) op = Operator(Eq(u.forward, u + 1), dle=('blocking', 'openmp')) # Does it compile? Honoring the OpenMP specification isn't trivial assert op.cfunction # Does it produce the right result op.apply(t_M=9) assert np.all(u.data[0] == 10) iterations = FindNodes(Iteration).visit(op._func_table['bf0']) assert iterations[0].pragmas[0].value == 'omp for collapse(2) schedule(static,1)' assert iterations[2].pragmas[0].value ==\ ('omp parallel for collapse(2) schedule(static,1) num_threads(%d)' % nhyperthreads())
def test_makeit_ssa(exprs, exp_u, exp_v): """ A test building Operators with non-trivial sequences of input expressions that push hard on the `makeit_ssa` utility function. """ grid = Grid(shape=(4, 4)) x, y = grid.dimensions # noqa u = Function(name='u', grid=grid) # noqa v = Function(name='v', grid=grid) # noqa s = Scalar(name='s') # noqa # List comprehension would need explicit locals/globals mappings to eval for i, e in enumerate(list(exprs)): exprs[i] = eval(e) op = Operator(exprs) op.apply() assert np.all(u.data == exp_u) assert np.all(v.data == exp_v)
def test_mpi_operator(): grid = Grid(shape=(4,)) f = TimeFunction(name='f', grid=grid) g = TimeFunction(name='g', grid=grid) # Using `sum` creates a stencil in `x`, which in turn will # trigger the generation of code for MPI halo exchange op = Operator(Eq(f.forward, f.sum() + 1)) op.apply(time=2) pkl_op = pickle.dumps(op) new_op = pickle.loads(pkl_op) assert str(op) == str(new_op) new_op.apply(time=2, f=g) assert np.all(f.data[0] == [2., 3., 3., 3.]) assert np.all(f.data[1] == [3., 6., 7., 7.]) assert np.all(g.data[0] == f.data[0]) assert np.all(g.data[1] == f.data[1])
def test_acoustic_w_src_w_rec(self): """ Test that the acoustic wave equation forward operator produces the correct results when running a 3D model also used in ``test_adjointA.py``. """ dt = self.model.critical_dt self.u.data[:] = 0.0 eqns = self.eqn eqns += self.src.inject(field=self.u.forward, expr=self.src * dt**2 / self.m) eqns += self.rec.interpolate(expr=self.u) op = Operator(eqns, subs=self.model.spacing_map) assert 'run_solution' in str(op) op.apply(u=self.u, vp=self.vp, damp=self.damp, src=self.src, rec=self.rec, dt=dt) # The expected norms have been computed "by hand" looking at the output # of test_adjointA's forward operator w/o using the YASK backend. exp_u = 154.05 exp_rec = 212.15 assert np.isclose(np.linalg.norm(self.u.data[:]), exp_u, atol=exp_u*1.e-2) assert np.isclose(np.linalg.norm(self.rec.data.reshape(-1)), exp_rec, atol=exp_rec*1.e-2)
def _new_operator3(shape, blockshape=None, dle=None): blockshape = as_tuple(blockshape) grid = Grid(shape=shape) spacing = 0.1 a = 0.5 c = 0.5 dx2, dy2 = spacing**2, spacing**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) # Allocate the grid and set initial condition # Note: This should be made simpler through the use of defaults u = TimeFunction(name='u', grid=grid, time_order=1, space_order=(2, 2, 2)) u.data[0, :] = np.arange(reduce(mul, shape), dtype=np.int32).reshape(shape) # Derive the stencil according to devito conventions eqn = Eq(u.dt, a * (u.dx2 + u.dy2) - c * (u.dxl + u.dyl)) stencil = solve(eqn, u.forward) op = Operator(Eq(u.forward, stencil), dle=dle) blocksizes = get_blocksizes(op, dle, grid, blockshape) op.apply(u=u, t=10, dt=dt, **blocksizes) return u.data[1, :], op
def test_redo_haloupdate_due_to_antidep(self): grid = Grid(shape=(12, )) x = grid.dimensions[0] t = grid.stepping_dim f = TimeFunction(name='f', grid=grid) g = TimeFunction(name='g', grid=grid) op = Operator([ Eq(f.forward, f[t, x - 1] + f[t, x + 1] + 1.), Eq(g.forward, f[t + 1, x - 1] + f[t + 1, x + 1] + g) ]) op.apply(time=0) calls = FindNodes(Call).visit(op) assert len(calls) == 2 assert np.all(f.data_ro_domain[1] == 1.) glb_pos_map = f.grid.distributor.glb_pos_map if LEFT in glb_pos_map[x]: assert np.all(g.data_ro_domain[1, 1:] == 2.) else: assert np.all(g.data_ro_domain[1, :-1] == 2.)
def test_cire(): grid = Grid(shape=(4, 4, 4)) u = TimeFunction(name='u', grid=grid, space_order=2) u1 = TimeFunction(name='u', grid=grid, space_order=2) eqn = Eq(u.forward, u.dy.dy + 1.) op0 = Operator(eqn, opt=('advanced', {'cire-mingain': 0})) op1 = Operator(eqn, opt=('advanced', { 'linearize': True, 'cire-mingain': 0 })) # Check generated code assert 'uL0' not in str(op0) assert 'uL0' in str(op1) op0.apply(time_M=10) op1.apply(time_M=10, u=u1) assert np.all(u.data == u1.data)
def test_collapsing(self): grid = Grid(shape=(3, 3, 3)) u = TimeFunction(name='u', grid=grid) op = Operator(Eq(u.forward, u + 1), opt=('blocking', 'openmp', {'par-nested': 0, 'par-collapse-ncores': 1, 'par-collapse-work': 0, 'par-dynamic-work': 0})) # Does it compile? Honoring the OpenMP specification isn't trivial assert op.cfunction # Does it produce the right result op.apply(t_M=9) assert np.all(u.data[0] == 10) iterations = FindNodes(Iteration).visit(op._func_table['bf0']) assert iterations[0].pragmas[0].value == 'omp for collapse(2) schedule(dynamic,1)' assert iterations[2].pragmas[0].value == ('omp parallel for collapse(2) ' 'schedule(dynamic,1) ' 'num_threads(nthreads_nested)')
def test_subdim_middle(self, opt): """ Tests that instantiating SubDimensions using the classmethod constructors works correctly. """ grid = Grid(shape=(4, 4, 4)) x, y, z = grid.dimensions t = grid.stepping_dim # noqa u = TimeFunction(name='u', grid=grid) # noqa xi = SubDimension.middle(name='xi', parent=x, thickness_left=1, thickness_right=1) eqs = [Eq(u.forward, u + 1)] eqs = [e.subs(x, xi) for e in eqs] op = Operator(eqs, opt=opt) u.data[:] = 1.0 op.apply(time_M=1) assert np.all(u.data[1, 0, :, :] == 1) assert np.all(u.data[1, -1, :, :] == 1) assert np.all(u.data[1, 1:3, :, :] == 2)
def execute_devito(ui, spacing=0.01, a=0.5, timesteps=500): """Execute diffusion stencil using the devito Operator API.""" nx, ny = ui.shape dx2, dy2 = spacing**2, spacing**2 dt = dx2 * dy2 / (2 * a * (dx2 + dy2)) # Allocate the grid and set initial condition # Note: This should be made simpler through the use of defaults grid = Grid(shape=(nx, ny)) u = TimeFunction(name='u', grid=grid, time_order=1, space_order=2) u.data[0, :] = ui[:] # Derive the stencil according to devito conventions eqn = Eq(u.dt, a * (u.dx2 + u.dy2)) stencil = solve(eqn, u.forward) op = Operator(Eq(u.forward, stencil)) # Execute the generated Devito stencil operator tstart = time.time() op.apply(u=u, t=timesteps, dt=dt) runtime = time.time() - tstart log("Devito: Diffusion with dx=%0.4f, dy=%0.4f, executed %d timesteps in %f seconds" % (spacing, spacing, timesteps, runtime)) return u.data[1, :], runtime
def test_discarding_runs(): grid = Grid(shape=(64, 64, 64)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.forward, f + 1.), opt=('advanced', { 'openmp': True, 'par-collapse-ncores': 1 })) op.apply(time=100, nthreads=4, autotune='aggressive') assert op._state['autotuning'][0]['runs'] == 18 assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 3 assert op._state['autotuning'][0]['tuned']['nthreads'] == 4 # With 1 < 4 threads, the AT eventually tries many more combinations op.apply(time=100, nthreads=1, autotune='aggressive') assert op._state['autotuning'][1]['runs'] == 25 assert op._state['autotuning'][1]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][1]['tuned']) == 3 assert op._state['autotuning'][1]['tuned']['nthreads'] == 1
def solver_adjust_w(I, w, dt, T, adjust_w=True): """ Solve u'' + w**2*u = 0 for t in (0,T], u(0)=I and u'(0)=0, by a central finite difference method with time step dt. """ dt = float(dt) Nt = int(round(T / dt)) t = Dimension('t', spacing=Constant('h_t')) u = TimeFunction(name='u', dimensions=(t, ), shape=(Nt + 1, ), space_order=2) # Adjust w if required w_adj = w * (1 - w**2 * dt**2 / 24.) if adjust_w else w u.data[:] = I eqn = u.dt2 + (w**2) * u stencil = Eq(u.forward, solve(eqn, u.forward)) op = Operator(stencil) op.apply(h_t=dt, t_M=Nt - 1) return u.data, np.linspace(0, Nt * dt, Nt + 1)
def test_acoustic_w_src_w_rec(self): """ Test that the acoustic wave equation forward operator produces the correct results when running a 3D model also used in ``test_adjointA.py``. """ dt = self.model.critical_dt self.u.data[:] = 0.0 eqns = self.eqn eqns += self.src.inject(field=self.u.forward, expr=self.src * dt**2 / self.m) eqns += self.rec.interpolate(expr=self.u) op = Operator(eqns, subs=self.model.spacing_map) assert 'run_solution' in str(op) op.apply(u=self.u, m=self.m, damp=self.damp, src=self.src, rec=self.rec, dt=dt) # The expected norms have been computed "by hand" looking at the output # of test_adjointA's forward operator w/o using the YASK backend. exp_u = 154.05 exp_rec = 212.15 assert np.isclose(np.linalg.norm(self.u.data[:]), exp_u, atol=exp_u*1.e-2) assert np.isclose(np.linalg.norm(self.rec.data.reshape(-1)), exp_rec, atol=exp_rec*1.e-2)
def test_constants(self): """ Check that :class:`Constant` objects are treated correctly. """ grid = Grid(shape=(4, 4, 4)) c = Constant(name='c', value=2., dtype=grid.dtype) p = SparseTimeFunction(name='points', grid=grid, nt=1, npoint=1) u = TimeFunction(name='yu4D', grid=grid, space_order=0) u.data[:] = 0. op = Operator([Eq(u.forward, u + c), Eq(p[0, 0], 1. + c)]) assert 'run_solution' in str(op) op.apply(yu4D=u, c=c, time=9) # Check YASK did its job and could read constant grids w/o problems assert np.all(u.data[0] == 20.) # Check the Constant could be read correctly even in Devito-land, i.e., # outside of run_solution assert p.data[0][0] == 3. # Check re-executing with another constant gives the correct result c2 = Constant(name='c', value=5.) op.apply(yu4D=u, c=c2, time=2) assert np.all(u.data[0] == 30.) assert np.all(u.data[1] == 35.) assert p.data[0][0] == 6.
def test_constants(self): """ Check that :class:`Constant` objects are treated correctly. """ grid = Grid(shape=(4, 4, 4)) c = Constant(name='c', value=2., dtype=grid.dtype) p = SparseTimeFunction(name='points', grid=grid, nt=1, npoint=1) u = TimeFunction(name='yu4D', grid=grid, space_order=0) u.data[:] = 0. op = Operator([Eq(u.forward, u + c), Eq(p.indexed[0, 0], 1. + c)]) assert 'run_solution' in str(op) op.apply(yu4D=u, c=c, t=9) # Check YASK did its job and could read constant grids w/o problems assert np.all(u.data[0] == 20.) # Check the Constant could be read correctly even in Devito-land, i.e., # outside of run_solution assert p.data[0][0] == 3. # Check re-executing with another constant gives the correct result c2 = Constant(name='c', value=5.) op.apply(yu4D=u, c=c2, t=2) assert np.all(u.data[0] == 30.) assert np.all(u.data[1] == 35.) assert p.data[0][0] == 6.
def test_staggered_div(): """ Test that div works properly on expressions. From @speglish issue #1248 """ grid = Grid(shape=(5, 5)) v = VectorTimeFunction(name="v", grid=grid, time_order=1, space_order=4) p1 = TimeFunction(name="p1", grid=grid, time_order=1, space_order=4, staggered=NODE) p2 = TimeFunction(name="p2", grid=grid, time_order=1, space_order=4, staggered=NODE) # Test that 1.*v and 1*v are doing the same v[0].data[:] = 1. v[1].data[:] = 1. eq1 = Eq(p1, div(1*v)) eq2 = Eq(p2, div(1.*v)) op1 = Operator([eq1]) op2 = Operator([eq2]) op1.apply(time_M=0) op2.apply(time_M=0) assert np.allclose(p1.data[:], p2.data[:], atol=0, rtol=1e-5) # Test div on expression v[0].data[:] = 5. v[1].data[:] = 5. A = Function(name="A", grid=grid, space_order=4, staggred=NODE, parameter=True) A._data_with_outhalo[:] = .5 av = VectorTimeFunction(name="av", grid=grid, time_order=1, space_order=4) # Operator with A (precomputed A*v) eq1 = Eq(av, A*v) eq2 = Eq(p1, div(av)) op = Operator([eq1, eq2]) op.apply(time_M=0) # Operator with div(A*v) directly eq3 = Eq(p2, div(A*v)) op2 = Operator([eq3]) op2.apply(time_M=0) assert np.allclose(p1.data[:], p2.data[:], atol=0, rtol=1e-5)
def random_walks1D_vec(x0, N, p, num_walks=1, num_times=1, random=random): """Vectorized version of random_walks1D.""" position = np.zeros(N + 1) # Accumulated positions position2 = np.zeros(N + 1) # Accumulated positions**2 # Histogram at num_times selected time points pos_hist = np.zeros((num_walks, num_times)) pos_hist_times = [(N // num_times) * i for i in range(num_times)] # Create and initialise our TimeFunction r x_d = Dimension(name='x_d') t_d = Dimension(name='t_d') r = TimeFunction(name='r', dimensions=(x_d, t_d), shape=(N + 1, num_walks)) # Setting each walk's first element to x0 r.data[0, :] = x0 steps = Function(name='steps', dimensions=(x_d, t_d), shape=(N + 1, num_walks)) # Populating steps with -1 if value in rs <= p at that point and 1 otherwise rs = random.uniform(0, 1, size=N * num_walks).reshape(num_walks, N) for n in range(num_walks): steps.data[:N, n] = np.where(rs[n] <= p, -1, 1) # Creating and applying operator that contains equation for adding steps eq = Eq(r.forward, r + steps) op = Operator(eq) op.apply() # Summing over data to give positions position = np.sum(r.data, axis=1) # Accumulated positions position2 = np.sum(r.data**2, axis=1) # Accumulated positions**2 pos_hist[:, :] = np.transpose(r.data[pos_hist_times, :]) return position, position2, pos_hist, np.array(pos_hist_times)
def test_bcs_basic(self): """ Test MPI in presence of boundary condition loops. Here, no halo exchange is expected (as there is no stencil in the computed expression) but we check that: * the left BC loop is computed by the leftmost rank only * the right BC loop is computed by the rightmost rank only """ grid = Grid(shape=(20,)) x = grid.dimensions[0] t = grid.stepping_dim thickness = 4 u = TimeFunction(name='u', grid=grid, time_order=1) xleft = SubDimension.left(name='xleft', parent=x, thickness=thickness) xi = SubDimension.middle(name='xi', parent=x, thickness_left=thickness, thickness_right=thickness) xright = SubDimension.right(name='xright', parent=x, thickness=thickness) t_in_centre = Eq(u[t+1, xi], 1) leftbc = Eq(u[t+1, xleft], u[t+1, xleft+1] + 1) rightbc = Eq(u[t+1, xright], u[t+1, xright-1] + 1) op = Operator([t_in_centre, leftbc, rightbc]) op.apply(time_m=1, time_M=1) glb_pos_map = u.grid.distributor.glb_pos_map if LEFT in glb_pos_map[x]: assert np.all(u.data_ro_domain[0, thickness:] == 1.) assert np.all(u.data_ro_domain[0, :thickness] == range(thickness+1, 1, -1)) else: assert np.all(u.data_ro_domain[0, :-thickness] == 1.) assert np.all(u.data_ro_domain[0, -thickness:] == range(2, thickness+2))
def test_edge_cases(self, exprs, simd_level, expected): # Tests for issue #1695 t, x, y = dimensions('t x y') g = TimeFunction(name='g', shape=(1, 3), dimensions=(t, x), time_order=0, dtype=np.int32) g.data[0, :] = [0, 1, 2] h1 = TimeFunction(name='h1', shape=(1, 3), dimensions=(t, y), time_order=0) h1.data[0, :] = 0 # List comprehension would need explicit locals/globals mappings to eval for i, e in enumerate(list(exprs)): exprs[i] = eval(e) op = Operator(exprs, opt=('advanced', {'openmp': True})) iterations = FindNodes(Iteration).visit(op) try: assert 'omp for collapse' in iterations[0].pragmas[0].value if simd_level: assert 'omp simd' in iterations[simd_level].pragmas[0].value except: # E.g. gcc-5 doesn't support array reductions, so the compiler will # generate different legal code assert not Ompizer._support_array_reduction( configuration['compiler']) assert any('omp for collapse' in i.pragmas[0].value for i in iterations if i.pragmas) op.apply() assert (h1.data == expected).all()
def test_different_halos(): grid = Grid(shape=(8, 8, 8)) f = Function(name='f', grid=grid, space_order=8) g = Function(name='g', grid=grid, space_order=16) u = TimeFunction(name='u', grid=grid, space_order=12) u1 = TimeFunction(name='u', grid=grid, space_order=12) f.data[:] = 1. g.data[:] = 2. eqn = Eq(u.forward, u + f + g + 1) op0 = Operator(eqn) op1 = Operator(eqn, opt=('advanced', {'linearize': True})) # Check generated code assert 'uL0' not in str(op0) assert 'uL0' in str(op1) op0.apply(time_M=4) op1.apply(time_M=4, u=u1) assert np.all(u.data == u1.data)
def test_conddim_backwards(): nt = 10 grid = Grid(shape=(4, 4)) time_dim = grid.time_dim x, y = grid.dimensions factor = Constant(name='factor', value=2, dtype=np.int32) time_sub = ConditionalDimension(name="time_sub", parent=time_dim, factor=factor) u = TimeFunction(name='u', grid=grid, time_order=0, save=nt, time_dim=time_sub) v = TimeFunction(name='v', grid=grid) v1 = TimeFunction(name='v', grid=grid) for i in range(u.save): u.data[i, :] = i eqns = [Eq(v.backward, v.backward + v + u + 1.)] op0 = Operator(eqns, opt='noop') op1 = Operator(eqns, opt='buffering') # Check generated code assert len(retrieve_iteration_tree(op1)) == 3 buffers = [i for i in FindSymbols().visit(op1) if i.is_Array] assert len(buffers) == 1 op0.apply(time_m=1, time_M=9) op1.apply(time_m=1, time_M=9, v=v1) assert np.all(v.data == v1.data)
def test_mode_runtime_backward(): """Test autotuning in runtime mode.""" grid = Grid(shape=(96, 96, 96)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.backward, f + 1.), dle=('advanced', {'openmp': False})) summary = op.apply(time=101, autotune=('basic', 'runtime')) # AT is expected to have attempted 6 block shapes assert op._state['autotuning'][0]['runs'] == 6 # AT is expected to have executed 30 timesteps assert summary[('section0', None)].itershapes[0][0] == 101-30 assert np.all(f.data[0] == 101) assert np.all(f.data[1] == 100)
def test_streaming_postponed_deletion(self): nt = 10 grid = Grid(shape=(10, 10, 10)) u = TimeFunction(name='u', grid=grid) v = TimeFunction(name='v', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=nt) u1 = TimeFunction(name='u', grid=grid) v1 = TimeFunction(name='v', grid=grid) for i in range(nt): usave.data[i, :] = i eqns = [Eq(u.forward, u + usave), Eq(v.forward, v + u.forward.dx + usave)] op0 = Operator(eqns, opt=('noop', {'gpu-fit': usave})) op1 = Operator(eqns, opt=('streaming', 'orchestrate')) op0.apply(time_M=nt-1) op1.apply(time_M=nt-1, u=u1, v=v1) assert np.all(u.data == u1.data) assert np.all(v.data == v1.data)
def test_basic(self): nt = 19 grid = Grid(shape=(11, 11)) time = grid.time_dim u = TimeFunction(name='u', grid=grid) assert(grid.stepping_dim in u.indices) u2 = TimeFunction(name='u2', grid=grid, save=nt) assert(time in u2.indices) factor = 4 time_subsampled = ConditionalDimension('t_sub', parent=time, factor=factor) usave = TimeFunction(name='usave', grid=grid, save=(nt+factor-1)//factor, time_dim=time_subsampled) assert(time_subsampled in usave.indices) eqns = [Eq(u.forward, u + 1.), Eq(u2.forward, u2 + 1.), Eq(usave, u)] op = Operator(eqns) op.apply(t_M=nt-2) assert np.all(np.allclose(u.data[(nt-1) % 3], nt-1)) assert np.all([np.allclose(u2.data[i], i) for i in range(nt)]) assert np.all([np.allclose(usave.data[i], i*factor) for i in range((nt+factor-1)//factor)])
def test_subdimleft_notparallel(self): """ Tests application of an Operator consisting of a subdimension defined over different sub-regions, explicitly created through the use of :class:`SubDimension`s. This tests that flow direction is not being automatically inferred from whether the subdimension is on the left or right boundary. """ grid = Grid(shape=(20, 20)) x, y = grid.dimensions t = grid.stepping_dim thickness = 4 u = TimeFunction(name='u', save=None, grid=grid, space_order=1, time_order=0) xl = SubDimension.left(name='xl', parent=x, thickness=thickness) yi = SubDimension.middle(name='yi', parent=y, thickness_left=thickness, thickness_right=thickness) # Flows inward (i.e. forward) rather than outward eq = Eq(u[t+1, xl, yi], u[t+1, xl-1, yi] + 1) op = Operator([eq]) iterations = FindNodes(Iteration).visit(op) assert all(i.is_Affine and i.is_Sequential for i in iterations if i.dim == xl) assert all(i.is_Affine and i.is_Parallel for i in iterations if i.dim == yi) op.apply(time_m=0, time_M=0) assert all(np.all(u.data[0, :thickness, thickness+i] == [1, 2, 3, 4]) for i in range(12)) assert np.all(u.data[0, thickness:] == 0) assert np.all(u.data[0, :, thickness+12:] == 0)
def test_symbolic_factor(self): """ Test ConditionalDimension with symbolic factor (provided as a Constant). """ g = Grid(shape=(4, 4, 4)) u = TimeFunction(name='u', grid=g, time_order=0) fact = Constant(name='fact', dtype=np.int32, value=4) tsub = ConditionalDimension(name='tsub', parent=g.time_dim, factor=fact) usave = TimeFunction(name='usave', grid=g, time_dim=tsub, save=4) op = Operator([Eq(u, u + 1), Eq(usave, u)]) op.apply(time=7) # Use `fact`'s default value, 4 assert np.all(usave.data[0] == 1) assert np.all(usave.data[1] == 5) u.data[:] = 0. op.apply(time=7, fact=2) assert np.all(usave.data[0] == 1) assert np.all(usave.data[1] == 3) assert np.all(usave.data[2] == 5) assert np.all(usave.data[3] == 7)
def test_two_heterogeneous_buffers(self): nt = 10 grid = Grid(shape=(4, 4)) u = TimeFunction(name='u', grid=grid, save=nt) u1 = TimeFunction(name='u', grid=grid, save=nt) v = TimeFunction(name='v', grid=grid, save=nt) v1 = TimeFunction(name='v', grid=grid, save=nt) eqns = [Eq(u.forward, u + v + 1), Eq(v.forward, u + v + v.backward)] op0 = Operator(eqns, opt='noop') op1 = Operator(eqns, opt='buffering') # Check generated code assert len(retrieve_iteration_tree(op1)) == 3 buffers = [i for i in FindSymbols().visit(op1) if i.is_Array] assert len(buffers) == 2 op0.apply(time_M=nt - 2) op1.apply(time_M=nt - 2, u=u1, v=v1) assert np.all(u.data == u1.data) assert np.all(v.data == v1.data)
def test_over_injection(self): nt = 10 grid = Grid(shape=(4, 4)) src = SparseTimeFunction(name='src', grid=grid, npoint=1, nt=nt) rec = SparseTimeFunction(name='rec', grid=grid, npoint=1, nt=nt) u = TimeFunction(name="u", grid=grid, time_order=2, space_order=2, save=nt) u1 = TimeFunction(name="u", grid=grid, time_order=2, space_order=2, save=nt) src.data[:] = 1. eqns = ([Eq(u.forward, u + 1)] + src.inject(field=u.forward, expr=src) + rec.interpolate(expr=u.forward)) op0 = Operator(eqns, opt='noop') op1 = Operator(eqns, opt='buffering') # Check generated code assert len(retrieve_iteration_tree(op1)) ==\ 5 + bool(configuration['language'] != 'C') buffers = [i for i in FindSymbols().visit(op1) if i.is_Array] assert len(buffers) == 1 op0.apply(time_M=nt - 2) op1.apply(time_M=nt - 2, u=u1) assert np.all(u.data == u1.data)
def test_no_index(self): """Test behaviour when the ConditionalDimension is used as a symbol in an expression.""" nt = 19 grid = Grid(shape=(11, 11)) time = grid.time_dim u = TimeFunction(name='u', grid=grid) assert(grid.stepping_dim in u.indices) v = Function(name='v', grid=grid) factor = 4 time_subsampled = ConditionalDimension('t_sub', parent=time, factor=factor) eqns = [Eq(u.forward, u + 1), Eq(v, v + u*u*time_subsampled)] op = Operator(eqns) op.apply(t_M=nt-2) assert np.all(np.allclose(u.data[(nt-1) % 3], nt-1)) # expected result is 1024 # v = u[0]**2 * 0 + u[4]**2 * 1 + u[8]**2 * 2 + u[12]**2 * 3 + u[16]**2 * 4 # with u[t] = t # v = 16 * 1 + 64 * 2 + 144 * 3 + 256 * 4 = 1600 assert np.all(np.allclose(v.data, 1600))
def test_bcs(self): """ Tests application of an Operator consisting of multiple equations defined over different sub-regions, explicitly created through the use of :class:`SubDimension`s. """ grid = Grid(shape=(20, 20)) x, y = grid.dimensions t = grid.stepping_dim thickness = 4 u = TimeFunction(name='u', save=None, grid=grid, space_order=0, time_order=1) xleft = SubDimension.left(name='xleft', parent=x, thickness=thickness) xi = SubDimension.middle(name='xi', parent=x, thickness_left=thickness, thickness_right=thickness) xright = SubDimension.right(name='xright', parent=x, thickness=thickness) yi = SubDimension.middle(name='yi', parent=y, thickness_left=thickness, thickness_right=thickness) t_in_centre = Eq(u[t+1, xi, yi], 1) leftbc = Eq(u[t+1, xleft, yi], u[t+1, xleft+1, yi] + 1) rightbc = Eq(u[t+1, xright, yi], u[t+1, xright-1, yi] + 1) op = Operator([t_in_centre, leftbc, rightbc]) op.apply(time_m=1, time_M=1) assert np.all(u.data[0, :, 0:thickness] == 0.) assert np.all(u.data[0, :, -thickness:] == 0.) assert all(np.all(u.data[0, i, thickness:-thickness] == (thickness+1-i)) for i in range(thickness)) assert all(np.all(u.data[0, -i, thickness:-thickness] == (thickness+2-i)) for i in range(1, thickness + 1)) assert np.all(u.data[0, thickness:-thickness, thickness:-thickness] == 1.)
def test_empty_arrays(self): """ MFE for issue #1641. """ grid = Grid(shape=(4, 4), extent=(3.0, 3.0)) f = TimeFunction(name='f', grid=grid, space_order=0) f.data[:] = 1. sf1 = SparseTimeFunction(name='sf1', grid=grid, npoint=0, nt=10) sf2 = SparseTimeFunction(name='sf2', grid=grid, npoint=0, nt=10) assert sf1.size == 0 assert sf2.size == 0 eqns = sf1.inject(field=f, expr=sf1 + sf2 + 1.) op = Operator(eqns) op.apply() assert np.all(f.data == 1.) # Again, but with a MatrixSparseTimeFunction mat = scipy.sparse.coo_matrix((0, 0), dtype=np.float32) sf = MatrixSparseTimeFunction(name="s", grid=grid, r=2, matrix=mat, nt=10) assert sf.size == 0 eqns = sf.interpolate(f) op = Operator(eqns) sf.manual_scatter() op(time_m=0, time_M=9) sf.manual_gather() assert np.all(f.data == 1.)
def test_tasking_fused(self): nt = 10 bundle0 = Bundle() grid = Grid(shape=(10, 10, 10), subdomains=bundle0) tmp = Function(name='tmp', grid=grid) u = TimeFunction(name='u', grid=grid, save=nt) v = TimeFunction(name='v', grid=grid, save=nt) w = TimeFunction(name='w', grid=grid) eqns = [Eq(w.forward, w + 1), Eq(tmp, w.forward), Eq(u.forward, tmp, subdomain=bundle0), Eq(v.forward, tmp, subdomain=bundle0)] op = Operator(eqns, opt=('tasking', 'fuse', 'orchestrate')) # Check generated code assert len(retrieve_iteration_tree(op)) == 4 locks = [i for i in FindSymbols().visit(op) if isinstance(i, Lock)] assert len(locks) == 1 # Only 1 because it's only `tmp` that needs protection assert len(op._func_table) == 2 exprs = FindNodes(Expression).visit(op._func_table['copy_device_to_host0'].root) assert len(exprs) == 19 assert str(exprs[12]) == 'int id = sdata0->id;' assert str(exprs[13]) == 'const int time = sdata0->time;' assert str(exprs[14]) == 'lock0[0] = 1;' assert exprs[15].write is u assert exprs[16].write is v assert str(exprs[17]) == 'lock0[0] = 2;' assert str(exprs[18]) == 'sdata0->flag = 1;' op.apply(time_M=nt-2) assert np.all(u.data[nt-1] == 9) assert np.all(v.data[nt-1] == 9)
def test_composite_full(self): nt = 10 grid = Grid(shape=(4, 4)) u = TimeFunction(name='u', grid=grid, save=nt) v = TimeFunction(name='v', grid=grid, save=nt) u1 = TimeFunction(name='u', grid=grid, save=nt) v1 = TimeFunction(name='v', grid=grid, save=nt) eqns = [Eq(u.forward, u + v + 1), Eq(v.forward, u + v + v.backward)] op0 = Operator(eqns, opt=('noop', {'gpu-fit': (u, v)})) op1 = Operator(eqns, opt=('buffering', 'tasking', 'streaming', 'orchestrate')) # Check generated code assert len(retrieve_iteration_tree(op1)) == 9 assert len([i for i in FindSymbols().visit(op1) if isinstance(i, Lock)]) == 2 op0.apply(time_M=nt-2) op1.apply(time_M=nt-2, u=u1, v=v1) assert np.all(u.data == u1.data) assert np.all(v.data == v1.data)
def test_shifted(self): nt = 19 grid = Grid(shape=(11, 11)) time = grid.time_dim u = TimeFunction(name='u', grid=grid) assert (grid.stepping_dim in u.indices) u2 = TimeFunction(name='u2', grid=grid, save=nt) assert (time in u2.indices) factor = 4 time_subsampled = ConditionalDimension('t_sub', parent=time, factor=factor) usave = TimeFunction(name='usave', grid=grid, save=2, time_dim=time_subsampled) assert (time_subsampled in usave.indices) t_sub_shift = Constant(name='t_sub_shift', dtype=np.int32) eqns = [ Eq(u.forward, u + 1.), Eq(u2.forward, u2 + 1.), Eq(usave.subs(time_subsampled, time_subsampled - t_sub_shift), u) ] op = Operator(eqns) # Starting at time_m=10, so time_subsampled - t_sub_shift is in range op.apply(time_m=10, time_M=nt - 2, t_sub_shift=3) assert np.all(np.allclose(u.data[0], 8)) assert np.all([np.allclose(u2.data[i], i - 10) for i in range(10, nt)]) assert np.all( [np.allclose(usave.data[i], 2 + i * factor) for i in range(2)])
def test_mode_runtime_backward(): """Test autotuning in runtime mode.""" grid = Grid(shape=(64, 64, 64)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.backward, f + 1.), dle=('advanced', {'openmp': False})) summary = op.apply(time=101, autotune=('basic', 'runtime')) # AT is expected to have attempted 6 block shapes assert op._state['autotuning'][0]['runs'] == 6 # AT is expected to have executed 30 timesteps assert summary['section0'].itershapes[0][0] == 101-30 assert np.all(f.data[0] == 101) assert np.all(f.data[1] == 100)
def test_no_sequential(self, expr, expected): """Tests code generation on skewed indices.""" grid = Grid(shape=(3, 3, 3)) x, y, z = grid.dimensions u = Function(name='u', grid=grid) # noqa v = Function(name='v', grid=grid) # noqa eqn = eval(expr) # List comprehension would need explicit locals/globals mappings to eval op = Operator(eqn, opt=('blocking', {'skewing': True})) op.apply() iters = FindNodes(Iteration).visit(op) time_iter = [i for i in iters if i.dim.is_Time] assert len(time_iter) == 0 iters = FindNodes(Iteration).visit(op) assert len(iters) == 3 assert iters[0].dim is x assert iters[1].dim is y assert iters[2].dim is z skewed = [i.expr for i in FindNodes(Expression).visit(op)] assert str(skewed[0]).replace(' ', '') == expected
def test_at_w_mpi(): """Make sure autotuning works in presence of MPI. MPI ranks work in isolation to determine the best block size, locally.""" grid = Grid(shape=(8, 8)) t = grid.stepping_dim x, y = grid.dimensions f = TimeFunction(name='f', grid=grid, time_order=1) f.data_with_halo[:] = 1. eq = Eq(f.forward, f[t, x, y-1] + f[t, x, y+1]) op = Operator(eq, dle=('advanced', {'openmp': False, 'blockinner': True})) op.apply(time=-1, autotune=('basic', 'runtime')) # Nothing really happened, as not enough timesteps assert np.all(f.data_ro_domain[0] == 1.) assert np.all(f.data_ro_domain[1] == 1.) # The 'destructive' mode writes directly to `f` for whatever timesteps required # to perform the autotuning. Eventually, the result is complete garbage; note # also that this autotuning mode disables the halo exchanges op.apply(time=-1, autotune=('basic', 'destructive')) assert np.all(f._data_ro_with_inhalo.sum() == 904) # Check the halo hasn't been touched during AT glb_pos_map = grid.distributor.glb_pos_map if LEFT in glb_pos_map[y]: assert np.all(f._data_ro_with_inhalo[:, :, -1] == 1) else: assert np.all(f._data_ro_with_inhalo[:, :, 0] == 1) # Finally, try running w/o AT, just to be sure nothing was broken f.data_with_halo[:] = 1. op.apply(time=2) if LEFT in glb_pos_map[y]: assert np.all(f.data_ro_domain[1, :, 0] == 5.) assert np.all(f.data_ro_domain[1, :, 1] == 7.) assert np.all(f.data_ro_domain[1, :, 2:4] == 8.) else: assert np.all(f.data_ro_domain[1, :, 4:6] == 8) assert np.all(f.data_ro_domain[1, :, 6] == 7) assert np.all(f.data_ro_domain[1, :, 7] == 5)
def test_multiple_blocking(): """ Test that if there are more than one blocked Iteration nests, then the autotuner works "incrementally" -- it starts determining the best block shape for the first Iteration nest, then it moves on to the second one, then the third, etc. IOW, the autotuner must not be attempting the cartesian product of all possible block shapes across the various blocked nests. """ grid = Grid(shape=(64, 64, 64)) u = TimeFunction(name='u', grid=grid, space_order=2) v = TimeFunction(name='v', grid=grid) op = Operator([Eq(u.forward, u + 1), Eq(v.forward, u.forward.dx2 + v + 1)], dle=('blocking', {'openmp': False})) # First of all, make sure there are indeed two different loop nests assert 'bf0' in op._func_table assert 'bf1' in op._func_table # 'basic' mode op.apply(time_M=0, autotune='basic') assert op._state['autotuning'][0]['runs'] == 12 # 6 for each Iteration nest assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 4 # 'aggressive' mode op.apply(time_M=0, autotune='aggressive') assert op._state['autotuning'][1]['runs'] == 60 assert op._state['autotuning'][1]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][1]['tuned']) == 4 # With OpenMP, we tune over one more argument (`nthreads`), though the AT # will only attempt one value op = Operator([Eq(u.forward, u + 1), Eq(v.forward, u.forward.dx2 + v + 1)], dle=('blocking', {'openmp': True})) op.apply(time_M=0, autotune='basic') assert op._state['autotuning'][0]['runs'] == 12 assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 5
def mat_mat(A, B, C, optimize): """``AB = C``.""" op = Operator(Inc(C, A*B), dle=optimize) op.apply() info('Executed `AB = C`')
def mat_vec(A, x, b, optimize): """``Ax = b``.""" op = Operator(Inc(b, A*x), dle=optimize) op.apply() info('Executed `Ax = b`')
def transpose_mat_vec(A, x, b, optimize): """``A -> A^T, A^Tx = b``.""" i, j = A.indices op = Operator([Inc(b, A[j, i]*x)], dle=optimize) op.apply() info('Executed `A^Tx = b`')
def mat_mat_sum(A, B, C, D, optimize): """``AB + AC = D``.""" op = Operator(Inc(D, A*B + A*C), dle=optimize) op.apply() info('Executed `AB + AC = D`')
def chain_contractions(A, B, C, D, E, F, optimize): """``AB + AC = D, DE = F``.""" op = Operator([Inc(D, A*B + A*C), Inc(F, D*E)], dle=optimize) op.apply() info('Executed `AB + AC = D, DE = F`')