def test_collapsing_v2(self): """ MFE from issue #1478. """ n = 8 m = 8 nx, ny, nchi, ncho = 12, 12, 1, 1 x, y = SpaceDimension("x"), SpaceDimension("y") ci, co = Dimension("ci"), Dimension("co") i, j = Dimension("i"), Dimension("j") grid = Grid((nx, ny), dtype=np.float32, dimensions=(x, y)) X = Function(name="xin", dimensions=(ci, x, y), shape=(nchi, nx, ny), grid=grid, space_order=n // 2) dy = Function(name="dy", dimensions=(co, x, y), shape=(ncho, nx, ny), grid=grid, space_order=n // 2) dW = Function(name="dW", dimensions=(co, ci, i, j), shape=(ncho, nchi, n, m), grid=grid) eq = [ Eq( dW[co, ci, i, j], dW[co, ci, i, j] + dy[co, x, y] * X[ci, x + i - n // 2, y + j - m // 2]) for i in range(n) for j in range(m) ] op = Operator(eq, opt=('advanced', {'openmp': True})) assert_structure(op, ['co,ci,x,y']) iterations = FindNodes(Iteration).visit(op) assert iterations[0].ncollapsed == 1 assert iterations[1].is_Vectorized assert iterations[2].is_Sequential assert iterations[3].is_Sequential
def test_at_w_mpi(): """Make sure autotuning works in presence of MPI. MPI ranks work in isolation to determine the best block size, locally.""" grid = Grid(shape=(8, 8)) t = grid.stepping_dim x, y = grid.dimensions f = TimeFunction(name='f', grid=grid, time_order=1) f.data_with_halo[:] = 1. eq = Eq(f.forward, f[t, x - 1, y] + f[t, x + 1, y]) op = Operator(eq, opt=('advanced', {'openmp': False, 'blockinner': True})) op.apply(time=-1, autotune=('basic', 'runtime')) # Nothing really happened, as not enough timesteps assert np.all(f.data_ro_domain[0] == 1.) assert np.all(f.data_ro_domain[1] == 1.) # The 'destructive' mode writes directly to `f` for whatever timesteps required # to perform the autotuning. Eventually, the result is complete garbage; note # also that this autotuning mode disables the halo exchanges op.apply(time=-1, autotune=('basic', 'destructive')) assert np.all(f._data_ro_with_inhalo.sum() == 904) # Check the halo hasn't been touched during AT glb_pos_map = grid.distributor.glb_pos_map if LEFT in glb_pos_map[x]: assert np.all(f._data_ro_with_inhalo[:, -1] == 1) else: assert np.all(f._data_ro_with_inhalo[:, 0] == 1) # Finally, try running w/o AT, just to be sure nothing was broken f.data_with_halo[:] = 1. op.apply(time=2) if LEFT in glb_pos_map[x]: assert np.all(f.data_ro_domain[1, 0] == 5.) assert np.all(f.data_ro_domain[1, 1] == 7.) assert np.all(f.data_ro_domain[1, 2:4] == 8.) else: assert np.all(f.data_ro_domain[1, 4:6] == 8) assert np.all(f.data_ro_domain[1, 6] == 7) assert np.all(f.data_ro_domain[1, 7] == 5)
def test_misc_dims(self): """ Tests grid-independent :class:`Function`s, which require YASK's "misc" dimensions. """ dx = Dimension(name='dx') grid = Grid(shape=(10, 10)) x, y = grid.dimensions time = grid.time_dim u = TimeFunction(name='u', grid=grid, time_order=1, space_order=4, save=4) c = Function(name='c', dimensions=(x, dx), shape=(10, 5)) step = Eq(u.forward, (u[time, x - 2, y] * c[x, 0] + u[time, x - 1, y] * c[x, 1] + u[time, x, y] * c[x, 2] + u[time, x + 1, y] * c[x, 3] + u[time, x + 2, y] * c[x, 4])) for i in range(10): c.data[i, 0] = 1.0 + i c.data[i, 1] = 1.0 + i c.data[i, 2] = 3.0 + i c.data[i, 3] = 6.0 + i c.data[i, 4] = 5.0 + i u.data[:] = 0.0 u.data[0, 2, :] = 2.0 op = Operator(step) assert 'run_solution' in str(op) op(time_m=0, time_M=0) assert (np.all(u.data[1, 0, :] == 10.0)) assert (np.all(u.data[1, 1, :] == 14.0)) assert (np.all(u.data[1, 2, :] == 10.0)) assert (np.all(u.data[1, 3, :] == 8.0)) assert (np.all(u.data[1, 4, :] == 10.0)) assert (np.all(u.data[1, 5:10, :] == 0.0))
def test_full_shape_with_subdims(self): """ Like `test_full_alias_shape_after_blocking`, but SubDomains (and therefore SubDimensions) are used. Nevertheless, the temporary shape should still be dictated by the root Dimensions. """ grid = Grid(shape=(3, 3, 3)) x, y, z = grid.dimensions # noqa t = grid.stepping_dim f = Function(name='f', grid=grid) f.data_with_halo[:] = 1. u = TimeFunction(name='u', grid=grid, space_order=3) u.data_with_halo[:] = 0. # Leads to 3D aliases eqn = Eq(u.forward, ((u[t, x, y, z] + u[t, x+1, y+1, z+1])*3*f + (u[t, x+2, y+2, z+2] + u[t, x+3, y+3, z+3])*3*f + 1), subdomain=grid.interior) op0 = Operator(eqn, dse='noop', dle=('advanced', {'openmp': True})) op1 = Operator(eqn, dse='aggressive', dle=('advanced', {'openmp': True})) xi0_blk_size = op1.parameters[9] yi0_blk_size = op1.parameters[15] z_size = op1.parameters[20] # Check Array shape arrays = [i for i in FindSymbols().visit(op1._func_table['bf0'].root) if i.is_Array] assert len(arrays) == 1 a = arrays[0] assert len(a.dimensions) == 3 assert a.halo == ((1, 1), (1, 1), (1, 1)) assert Add(*a.symbolic_shape[0].args) == xi0_blk_size + 2 assert Add(*a.symbolic_shape[1].args) == yi0_blk_size + 2 assert Add(*a.symbolic_shape[2].args) == z_size + 2 # Check numerical output op0(time_M=1) exp = np.copy(u.data[:]) u.data_with_halo[:] = 0. op1(time_M=1) assert np.all(u.data == exp)
def test_cache_blocking_imperfect_nest_v2(blockinner): """ Test that a non-perfect Iteration nest is blocked correctly. This is slightly different than ``test_cache_blocking_imperfect_nest`` as here only one Iteration gets blocked. """ shape = (16, 16, 16) grid = Grid(shape=shape, dtype=np.float64) u = TimeFunction(name='u', grid=grid, space_order=2) u.data[:] = np.linspace(0, 1, reduce(mul, shape), dtype=np.float64).reshape(shape) eq = Eq(u.forward, 0.01*u.dy.dy) op0 = Operator(eq, opt='noop') op1 = Operator(eq, opt=('cire-sops', {'blockinner': blockinner})) op2 = Operator(eq, opt=('advanced-fsg', {'blockinner': blockinner})) # First, check the generated code trees = retrieve_iteration_tree(op2._func_table['bf0'].root) assert len(trees) == 2 assert len(trees[0]) == len(trees[1]) assert all(i is j for i, j in zip(trees[0][:2], trees[1][:2])) assert trees[0][2] is not trees[1][2] assert trees[0].root.dim.is_Incr assert trees[1].root.dim.is_Incr assert op2.parameters[6] is trees[0].root.step op0(time_M=0) u1 = TimeFunction(name='u1', grid=grid, space_order=2) u1.data[:] = np.linspace(0, 1, reduce(mul, shape), dtype=np.float64).reshape(shape) op1(time_M=0, u=u1) u2 = TimeFunction(name='u2', grid=grid, space_order=2) u2.data[:] = np.linspace(0, 1, reduce(mul, shape), dtype=np.float64).reshape(shape) op2(time_M=0, u=u2) assert np.allclose(u.data, u1.data, rtol=1e-07) assert np.allclose(u.data, u2.data, rtol=1e-07)
def test_multiple_subnests_v1(self): """ Unlike ``test_multiple_subnestes_v0``, now we use the ``cire-rotate=True`` option, which trades some of the inner parallelism for a smaller working set. """ grid = Grid(shape=(3, 3, 3)) x, y, z = grid.dimensions t = grid.stepping_dim f = Function(name='f', grid=grid) u = TimeFunction(name='u', grid=grid, space_order=3) eqn = Eq( u.forward, ((u[t, x, y, z] + u[t, x + 1, y + 1, z + 1]) * 3 * f + (u[t, x + 2, y + 2, z + 2] + u[t, x + 3, y + 3, z + 3]) * 3 * f + 1)) op = Operator(eqn, opt=('advanced', { 'openmp': True, 'cire-mincost-sops': 1, 'cire-rotate': True, 'par-nested': 0, 'par-collapse-ncores': 1, 'par-dynamic-work': 0 })) trees = retrieve_iteration_tree(op._func_table['bf0'].root) assert len(trees) == 2 assert trees[0][0] is trees[1][0] assert trees[0][0].pragmas[0].value ==\ 'omp for collapse(2) schedule(dynamic,1)' assert not trees[0][2].pragmas assert not trees[0][3].pragmas assert trees[0][4].pragmas[0].value == ('omp parallel for collapse(1) ' 'schedule(dynamic,1) ' 'num_threads(nthreads_nested)') assert not trees[1][2].pragmas assert trees[1][3].pragmas[0].value == ('omp parallel for collapse(1) ' 'schedule(dynamic,1) ' 'num_threads(nthreads_nested)')
def test_subdimleft_parallel(self): """ Tests application of an Operator consisting of a subdimension defined over different sub-regions, explicitly created through the use of :class:`SubDimension`s. This tests that flow direction is not being automatically inferred from whether the subdimension is on the left or right boundary. """ grid = Grid(shape=(20, 20)) x, y = grid.dimensions t = grid.stepping_dim thickness = 4 u = TimeFunction(name='u', save=None, grid=grid, space_order=0, time_order=1) xl = SubDimension.left(name='xl', parent=x, thickness=thickness) yi = SubDimension.middle(name='yi', parent=y, thickness_left=thickness, thickness_right=thickness) # Can be done in parallel eq = Eq(u[t + 1, xl, yi], u[t, xl, yi] + 1) op = Operator([eq]) iterations = FindNodes(Iteration).visit(op) assert all(i.is_Affine and i.is_Parallel for i in iterations if i.dim in [xl, yi]) op.apply(time_m=0, time_M=0) assert np.all(u.data[1, 0:thickness, 0:thickness] == 0) assert np.all(u.data[1, 0:thickness, -thickness:] == 0) assert np.all(u.data[1, 0:thickness, thickness:-thickness] == 1) assert np.all(u.data[1, thickness + 1:, :] == 0)
def test_read_only_w_offset(): nt = 10 grid = Grid(shape=(2, 2)) u = TimeFunction(name='u', grid=grid, save=nt) v = TimeFunction(name='v', grid=grid) v1 = TimeFunction(name='v', grid=grid) for i in range(nt): u.data[i, :] = i eqns = [Eq(v.forward, v + u.backward + u + u.forward + 1.)] op0 = Operator(eqns, opt='noop') op1 = Operator(eqns, opt='buffering') op0.apply(time_M=nt - 2, time_m=4) op1.apply(time_M=nt - 2, time_m=4, v=v1) assert np.all(v.data == v1.data)
def test_interior_w_stencil(self): grid = Grid(shape=(10, )) x = grid.dimensions[0] t = grid.stepping_dim u = TimeFunction(name='u', grid=grid) op = Operator( Eq(u.forward, u[t, x - 1] + u[t, x + 1] + 1, subdomain=grid.interior)) op.apply(time=1) glb_pos_map = u.grid.distributor.glb_pos_map if LEFT in glb_pos_map[x]: assert np.all(u.data_ro_domain[0, 1] == 2.) assert np.all(u.data_ro_domain[0, 2:] == 3.) else: assert np.all(u.data_ro_domain[0, -2] == 2.) assert np.all(u.data_ro_domain[0, :-2] == 3.)
def test_discarding_runs(): grid = Grid(shape=(64, 64, 64)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.forward, f + 1.), opt=('advanced', {'openmp': True, 'par-collapse-ncores': 1})) op.apply(time=100, nthreads=4, autotune='aggressive') assert op._state['autotuning'][0]['runs'] == 18 assert op._state['autotuning'][0]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][0]['tuned']) == 3 assert op._state['autotuning'][0]['tuned']['nthreads'] == 4 # With 1 < 4 threads, the AT eventually tries many more combinations op.apply(time=100, nthreads=1, autotune='aggressive') assert op._state['autotuning'][1]['runs'] == 25 assert op._state['autotuning'][1]['tpr'] == options['squeezer'] + 1 assert len(op._state['autotuning'][1]['tuned']) == 3 assert op._state['autotuning'][1]['tuned']['nthreads'] == 1
def test_trivial_eq_1d(self): grid = Grid(shape=(32, )) x = grid.dimensions[0] t = grid.stepping_dim f = TimeFunction(name='f', grid=grid) f.data_with_halo[:] = 1. op = Operator(Eq(f.forward, f[t, x - 1] + f[t, x + 1] + 1)) op.apply(time=1) assert np.all(f.data_ro_domain[1] == 3.) if f.grid.distributor.myrank == 0: assert f.data_ro_domain[0, 0] == 5. assert np.all(f.data_ro_domain[0, 1:] == 7.) elif f.grid.distributor.myrank == f.grid.distributor.nprocs - 1: assert f.data_ro_domain[0, -1] == 5. assert np.all(f.data_ro_domain[0, :-1] == 7.) else: assert np.all(f.data_ro_domain[0] == 7.)
def test_argument_from_index_constant(self): nx, ny = 30, 30 grid = Grid(shape=(nx, ny)) x, y = grid.dimensions arbdim = Dimension('arb') u = TimeFunction(name='u', grid=grid, save=None, time_order=2, space_order=0) snap = Function(name='snap', dimensions=(arbdim, x, y), shape=(5, nx, ny), space_order=0) save_t = Constant(name='save_t', dtype=np.int32) save_slot = Constant(name='save_slot', dtype=np.int32) expr = Eq(snap.subs(arbdim, save_slot), u.subs(grid.stepping_dim, save_t)) op = Operator(expr) u.data[:] = 0.0 snap.data[:] = 0.0 u.data[0, 10, 10] = 1.0 op.apply(save_t=0, save_slot=1) assert snap.data[1, 10, 10] == 1.0
def test_expr_collection(self): """ Test that expressions with different time dimensions are not collected. """ grid = Grid((10,)) f = TimeFunction(name="f", grid=grid, save=10) f2 = TimeFunction(name="f2", grid=grid, save=10) g = TimeFunction(name="g", grid=grid) g2 = TimeFunction(name="g2", grid=grid) w = Function(name="w", grid=grid) eq = Eq(w, f.dt*g + f2.dt*g2) with timed_region('x'): # Since all Function are time dependent, there should be no collection # and produce the same result as with the pre evaluated expression expr = Operator._lower_exprs([eq])[0] expr2 = Operator._lower_exprs([eq.evaluate])[0] assert expr == expr2
def test_default_functions(self): """ Test the default argument derivation for functions. """ grid = Grid(shape=(5, 6, 7)) f = TimeFunction(name='f', grid=grid) g = Function(name='g', grid=grid) op = Operator(Eq(g, g + f)) expected = { 'x_size': 5, 'x_m': 0, 'x_M': 4, 'y_size': 6, 'y_m': 0, 'y_M': 5, 'z_size': 7, 'z_m': 0, 'z_M': 6, 'f': f.data_allocated, 'g': g.data_allocated, } self.verify_arguments(op.arguments(time=4), expected) exp_parameters = ['f', 'g', 'x_m', 'x_M', 'x_size', 'y_m', 'y_M', 'y_size', 'z_m', 'z_M', 'z_size', 'time_m', 'time_M'] self.verify_parameters(op.parameters, exp_parameters)
def test_streaming_two_buffers(self): nt = 10 grid = Grid(shape=(4, 4)) u = TimeFunction(name='u', grid=grid) usave = TimeFunction(name='usave', grid=grid, save=nt) vsave = TimeFunction(name='vsave', grid=grid, save=nt) for i in range(nt): usave.data[i, :] = i vsave.data[i, :] = i eqn = Eq(u.forward, u + usave + vsave) op = Operator(eqn, opt=('streaming', 'orchestrate')) op.apply(time_M=nt-2) assert np.all(u.data[0] == 56) assert np.all(u.data[1] == 72)
def get_eq(u, a, b, conf): if conf['l'] == 'x' or conf['r1'] == 'x': a_deriv = a.dx elif conf['l'] == 'y' or conf['r1'] == 'y': a_deriv = a.dy elif conf['l'] == '(x, y)' or conf['r1'] == '(x, y)': a_deriv = a.dx + a.dy else: raise ValueError("Invalid configuration") if conf['r2'] == 'y': b_deriv = b.dy elif conf['r2'] == '(x, y)': b_deriv = b.dx + b.dy elif conf['r2'] is None: b_deriv = 0. else: raise ValueError("Invalid configuration") return Eq(u, a_deriv + b_deriv)
def test_mpi_operator(): grid = Grid(shape=(4, )) f = TimeFunction(name='f', grid=grid) g = TimeFunction(name='g', grid=grid) # Using `sum` creates a stencil in `x`, which in turn will # trigger the generation of code for MPI halo exchange op = Operator(Eq(f.forward, f.sum() + 1)) op.apply(time=2) pkl_op = pickle.dumps(op) new_op = pickle.loads(pkl_op) assert str(op) == str(new_op) new_op.apply(time=2, f=g) assert np.all(f.data[0] == [2., 3., 3., 3.]) assert np.all(f.data[1] == [3., 6., 7., 7.]) assert np.all(g.data[0] == f.data[0]) assert np.all(g.data[1] == f.data[1])
def test_dynamic_nthreads(): grid = Grid(shape=(16, 16, 16)) f = TimeFunction(name='f', grid=grid) op = Operator(Eq(f.forward, f + 1.), dle='openmp') # Check num_threads appears in the generated code # Not very elegant, but it does the trick assert 'num_threads(nt)' in str(op) # Check `op` accepts the `nthreads` kwarg op.apply(time=0) op.apply(time_m=1, time_M=1, nthreads=4) assert np.all(f.data[0] == 2.) # Check the actual value assumed by `nthreads` from devito.dle.backends.parallelizer import ncores assert op.arguments(time=0)['nthreads'] == ncores() # default value assert op.arguments(time=0, nthreads=123)['nthreads'] == 123 # user supplied
def solver(I, w, dt, T): """ Solve u'' + w**2*u = 0 for t in (0,T], u(0)=I and u'(0)=0, by a central finite difference method with time step dt. """ dt = float(dt) Nt = int(round(T / dt)) t = Dimension('t', spacing=Constant('h_t')) u = TimeFunction(name='u', dimensions=(t, ), shape=(Nt + 1, ), space_order=2) u.data[:] = I eqn = u.dt2 + (w**2) * u stencil = Eq(u.forward, solve(eqn, u.forward)) op = Operator(stencil) op.apply(h_t=dt, t_M=Nt - 1) return u.data, np.linspace(0, Nt * dt, Nt + 1)
def test_collapsing(self): grid = Grid(shape=(3, 3, 3)) u = TimeFunction(name='u', grid=grid) op = Operator(Eq(u.forward, u + 1), dle=('blocking', 'openmp')) # Does it compile? Honoring the OpenMP specification isn't trivial assert op.cfunction # Does it produce the right result op.apply(t_M=9) assert np.all(u.data[0] == 10) iterations = FindNodes(Iteration).visit(op._func_table['bf0']) assert iterations[0].pragmas[ 0].value == 'omp for collapse(2) schedule(static,1)' assert iterations[2].pragmas[0].value ==\ ('omp parallel for collapse(2) schedule(static,1) num_threads(%d)' % nhyperthreads())
def test_at_is_actually_working(shape, expected): """ Check that autotuning is actually running when switched on, in both 2D and 3D operators. """ grid = Grid(shape=shape) buffer = StringIO() temporary_handler = logging.StreamHandler(buffer) logger.addHandler(temporary_handler) infield = Function(name='infield', grid=grid) infield.data[:] = np.arange(reduce(mul, shape), dtype=np.int32).reshape(shape) outfield = Function(name='outfield', grid=grid) stencil = Eq(outfield.indexify(), outfield.indexify() + infield.indexify() * 3.0) op = Operator(stencil, dle=('blocking', { 'blockinner': True, 'blockalways': True })) # Expected 3 AT attempts for the given shape op(infield=infield, outfield=outfield, autotune=True) out = [i for i in buffer.getvalue().split('\n') if 'took' in i] assert len(out) == 4 # Now try the same with aggressive autotuning, which tries 9 more cases configuration['autotuning'] = 'aggressive' op(infield=infield, outfield=outfield, autotune=True) out = [i for i in buffer.getvalue().split('\n') if 'took' in i] assert len(out) == expected configuration['autotuning'] = configuration._defaults['autotuning'] logger.removeHandler(temporary_handler) temporary_handler.flush() temporary_handler.close() buffer.flush() buffer.close()
def test_fd_space(self, derivative, space_order): """ This test compares the discrete finite-difference scheme against polynomials For a given order p, the finite difference scheme should be exact for polynomials of order p :param derivative: name of the derivative to be tested :param space_order: space order of the finite difference stencil """ clear_cache() # dummy axis dimension nx = 100 xx = np.linspace(-1, 1, nx) dx = xx[1] - xx[0] # Symbolic data grid = Grid(shape=(nx, ), dtype=np.float32) x = grid.dimensions[0] u = Function(name="u", grid=grid, space_order=space_order) du = Function(name="du", grid=grid, space_order=space_order) # Define polynomial with exact fd coeffs = np.ones((space_order, ), dtype=np.float32) polynome = sum([coeffs[i] * x**i for i in range(0, space_order)]) polyvalues = np.array([polynome.subs(x, xi) for xi in xx], np.float32) # Fill original data with the polynomial values u.data[:] = polyvalues # True derivative of the polynome Dpolynome = diff( diff(polynome)) if derivative == 'dx2' else diff(polynome) Dpolyvalues = np.array([Dpolynome.subs(x, xi) for xi in xx], np.float32) # FD derivative, symbolic u_deriv = getattr(u, derivative) # Compute numerical FD stencil = Eq(du, u_deriv) op = Operator(stencil, subs={x.spacing: dx}) op.apply() # Check exactness of the numerical derivative except inside space_brd space_border = space_order error = abs(du.data[space_border:-space_border] - Dpolyvalues[space_border:-space_border]) assert np.isclose(np.mean(error), 0., atol=1e-3)
def test_concurrent_executing_operators(): rng = np.random.default_rng() # build a simple operator and force it to compile grid = Grid(shape=(50, 50, 50)) u = TimeFunction(name='u', grid=grid) op = Operator(Eq(u.forward, u + 1)) # this forces the compile op.cfunction def do_run(op): # choose a new size shape = (rng.integers(20, 22), 30, rng.integers(20, 22)) # make concurrent executions put a different value in the array # so we can be sure they aren't sharing an object even though the # name is reused val = current_thread().ident % 100000 grid_private = Grid(shape=shape) u_private = TimeFunction(name='u', grid=grid_private) u_private.data[:] = val op(u=u_private, time_m=1, time_M=100) assert np.all(u_private.data[1, :, :, :] == val + 100) info("First running serially to demonstrate it works") do_run(op) info("Now creating thread pool") tpe = ThreadPoolExecutor(max_workers=16) info("Running operator in threadpool") futures = [] for i in range(1000): futures.append(tpe.submit(do_run, op)) # Get results - exceptions will be raised here if there are any for f in futures: f.result()
def initialize_damp(damp, padsizes, spacing, abc_type="damp", fs=False): """ Initialize damping field with an absorbing boundary layer. Parameters ---------- damp : Function The damping field for absorbing boundary condition. nbl : int Number of points in the damping layer. spacing : Grid spacing coefficient. mask : bool, optional whether the dampening is a mask or layer. mask => 1 inside the domain and decreases in the layer not mask => 0 inside the domain and increase in the layer """ eqs = [Eq(damp, 1.0 if abc_type == "mask" else 0.0)] for (nbl, nbr), d in zip(padsizes, damp.dimensions): if not fs or d is not damp.dimensions[-1]: dampcoeff = 1.5 * np.log(1.0 / 0.001) / (nbl) # left dim_l = SubDimension.left(name='abc_%s_l' % d.name, parent=d, thickness=nbl) pos = Abs((nbl - (dim_l - d.symbolic_min) + 1) / float(nbl)) val = dampcoeff * (pos - sin(2 * np.pi * pos) / (2 * np.pi)) val = -val if abc_type == "mask" else val eqs += [Inc(damp.subs({d: dim_l}), val / d.spacing)] # right dampcoeff = 1.5 * np.log(1.0 / 0.001) / (nbr) dim_r = SubDimension.right(name='abc_%s_r' % d.name, parent=d, thickness=nbr) pos = Abs((nbr - (d.symbolic_max - dim_r) + 1) / float(nbr)) val = dampcoeff * (pos - sin(2 * np.pi * pos) / (2 * np.pi)) val = -val if abc_type == "mask" else val eqs += [Inc(damp.subs({d: dim_r}), val / d.spacing)] Operator(eqs, name='initdamp')()
def test_full_shape_after_blocking(self): """ Check the shape of the Array used to store a DSE-captured aliasing expression. The shape is impacted by loop blocking, which reduces the required write-to space. """ grid = Grid(shape=(3, 3, 3)) x, y, z = grid.dimensions # noqa t = grid.stepping_dim f = Function(name='f', grid=grid) f.data_with_halo[:] = 1. u = TimeFunction(name='u', grid=grid, space_order=3) u.data_with_halo[:] = 0. # Leads to 3D aliases eqn = Eq(u.forward, ((u[t, x, y, z] + u[t, x+1, y+1, z+1])*3*f + (u[t, x+2, y+2, z+2] + u[t, x+3, y+3, z+3])*3*f + 1)) op0 = Operator(eqn, dse='noop', dle=('advanced', {'openmp': True})) op1 = Operator(eqn, dse='aggressive', dle=('advanced', {'openmp': True})) x0_blk_size = op1.parameters[-3] y0_blk_size = op1.parameters[-2] z_size = op1.parameters[4] # Check Array shape arrays = [i for i in FindSymbols().visit(op1._func_table['bf0'].root) if i.is_Array] assert len(arrays) == 1 a = arrays[0] assert len(a.dimensions) == 3 assert a.halo == ((1, 1), (1, 1), (1, 1)) assert Add(*a.symbolic_shape[0].args) == x0_blk_size + 2 assert Add(*a.symbolic_shape[1].args) == y0_blk_size + 2 assert Add(*a.symbolic_shape[2].args) == z_size + 2 # Check numerical output op0(time_M=1) exp = np.copy(u.data[:]) u.data_with_halo[:] = 0. op1(time_M=1) assert np.all(u.data == exp)
def test_misc_dims(self): """ Test MPI in presence of Functions with mixed distributed/replicated Dimensions, with only a strict subset of the Grid dimensions used. """ dx = Dimension(name='dx') grid = Grid(shape=(4, 4)) x, y = grid.dimensions glb_pos_map = grid.distributor.glb_pos_map time = grid.time_dim u = TimeFunction(name='u', grid=grid, time_order=1, space_order=2, save=4) c = Function(name='c', grid=grid, dimensions=(x, dx), shape=(4, 5)) step = Eq(u.forward, ( u[time, x-2, y] * c[x, 0] + u[time, x-1, y] * c[x, 1] + u[time, x, y] * c[x, 2] + u[time, x+1, y] * c[x, 3] + u[time, x+2, y] * c[x, 4])) for i in range(4): c.data[i, 0] = 1.0+i c.data[i, 1] = 1.0+i c.data[i, 2] = 3.0+i c.data[i, 3] = 6.0+i c.data[i, 4] = 5.0+i u.data[:] = 0.0 u.data[0, 2, :] = 2.0 op = Operator(step) op(time_m=0, time_M=0) if LEFT in glb_pos_map[x]: assert(np.all(u.data[1, 0, :] == 10.0)) assert(np.all(u.data[1, 1, :] == 14.0)) else: assert(np.all(u.data[1, 2, :] == 10.0)) assert(np.all(u.data[1, 3, :] == 8.0))
def test_flow_detection(self): """ Test detection of spatial flow directions inside a time loop. Stencil uses values at new timestep as well as those at previous ones This forces an evaluation order onto x. Weights are: x=0 x=1 x=2 x=3 t=n 2 ---3 v / t=n+1 o--+----4 Flow dependency should traverse x in the negative direction x=2 x=3 x=4 x=5 x=6 t=0 0 --- 0 -- 1 -- 0 v / v / v / t=1 44 -+--- 11 -+--- 2--+ -- 0 """ grid = Grid(shape=(10, 10)) x, y = grid.dimensions u = TimeFunction(name='u', grid=grid, save=2, time_order=1, space_order=0) step = Eq(u.forward, 2*u + 3*u.subs(x, x+x.spacing) + 4*u.forward.subs(x, x+x.spacing)) op = Operator(step) u.data[:] = 0.0 u.data[0, 5, 5] = 1.0 op.apply(time_M=0) assert u.data[1, 5, 5] == 2 assert u.data[1, 4, 5] == 11 assert u.data[1, 3, 5] == 44 assert u.data[1, 2, 5] == 4*44 assert u.data[1, 1, 5] == 4*4*44 assert u.data[1, 0, 5] == 4*4*4*44 assert np.all(u.data[1, 6:, :] == 0) assert np.all(u.data[1, :, 0:5] == 0) assert np.all(u.data[1, :, 6:] == 0)
def test_everything(): nt = 50 grid = Grid(shape=(6, 6)) x, y = grid.dimensions time = grid.time_dim xi = SubDimension.middle(name='xi', parent=x, thickness_left=2, thickness_right=2) yi = SubDimension.middle(name='yi', parent=y, thickness_left=2, thickness_right=2) factor = Constant(name='factor', value=5, dtype=np.int32) t_sub = ConditionalDimension('t_sub', parent=time, factor=factor) save_shift = Constant(name='save_shift', dtype=np.int32) u = TimeFunction(name='u', grid=grid, time_order=0) u1 = TimeFunction(name='u', grid=grid, time_order=0) va = TimeFunction(name='va', grid=grid, time_order=0, save=(int(nt//factor.data)), time_dim=t_sub) vb = TimeFunction(name='vb', grid=grid, time_order=0, save=(int(nt//factor.data)), time_dim=t_sub) for i in range(va.save): va.data[i, :] = i vb.data[i, :] = i*2 - 1 vas = va.subs(t_sub, t_sub - save_shift) vasb = va.subs(t_sub, t_sub - 1 - save_shift) vasf = va.subs(t_sub, t_sub + 1 - save_shift) eqns = [Eq(u.forward, u + (vasb + vas + vasf)*2. + vb)] eqns = [e.xreplace({x: xi, y: yi}) for e in eqns] op0 = Operator(eqns, opt='noop') op1 = Operator(eqns, opt='buffering') # Check generated code assert len([i for i in FindSymbols().visit(op1) if i.is_Array]) == 2 op0.apply(time_m=15, time_M=35, save_shift=0) op1.apply(time_m=15, time_M=35, save_shift=0, u=u1) assert np.all(u.data == u1.data)
def random_walksdD_vec(x0, N, p, num_walks=1, num_times=1, random=np.random): """Vectorized version of random_walksdD.""" d = len(x0) position = np.zeros((N + 1, d)) # Accumulated positions position2 = np.zeros((N + 1, d)) # Accumulated positions**2 # Histogram at num_times selected time points pos_hist = np.zeros((num_walks, num_times, d)) pos_hist_times = [(N // num_times) * i for i in range(num_times)] # Create and initialise our TimeFunction r x_d = Dimension(name='x_d') t_d = Dimension(name='t_d') d_d = Dimension(name='d_d') r = TimeFunction(name='r', dimensions=(x_d, t_d, d_d), shape=(N + 1, num_walks, d)) # Setting each walk's first element to x0 r.data[0] = x0 steps = Function(name='steps', dimensions=(x_d, t_d, d_d), shape=(N + 1, num_walks, d)) # Populating steps with -1 if value in rs <= p at that point and 1 otherwise rs = random.uniform(0, 1, size=N * num_walks * d).reshape(num_walks, N, d) for n in range(num_walks): steps.data[:N, n] = np.where(rs[n] <= p, -1, 1) # Creating and applying operator that contains equation for adding steps eq = Eq(r.forward, r + steps) op = Operator(eq) op.apply() # Summing over data to give positions position = np.sum(r.data, axis=1) # Accumulated positions position2 = np.sum(r.data**2, axis=1) # Accumulated positions**2 for k in range(num_walks): pos_hist[k, :] = r.data[pos_hist_times, k] return position, position2, pos_hist, np.array(pos_hist_times)
def test_interior(self): """ Tests application of an Operator consisting of a single equation over the ``interior`` subdomain. """ grid = Grid(shape=(4, 4, 4)) x, y, z = grid.dimensions interior = grid.interior u = TimeFunction(name='u', grid=grid) eqn = [Eq(u.forward, u + 2, subdomain=interior)] op = Operator(eqn, dle='noop') op.apply(time_M=2) assert np.all(u.data[1, 1:-1, 1:-1, 1:-1] == 6.) assert np.all(u.data[1, :, 0] == 0.) assert np.all(u.data[1, :, -1] == 0.) assert np.all(u.data[1, :, :, 0] == 0.) assert np.all(u.data[1, :, :, -1] == 0.)