def test_adjoint_F(self, mkey, shape, kernel, space_order, nbpml): """ Adjoint test for the forward modeling operator. The forward modeling operator F generates a shot record (measurements) from a source while the adjoint of F generates measurments at the source location from data. This test uses the conventional dot test: < Fx, y> = <x, F^T y> """ tn = 500. # Final time # Create solver from preset solver = acoustic_setup(shape=shape, spacing=[15. for _ in shape], kernel=kernel, nbpml=nbpml, tn=tn, space_order=space_order, **(presets[mkey]), dtype=np.float64) # Create adjoint receiver symbol srca = Receiver(name='srca', grid=solver.model.grid, time_range=solver.geometry.time_axis, coordinates=solver.geometry.src_positions) # Run forward and adjoint operators rec, _, _ = solver.forward(save=False) solver.adjoint(rec=rec, srca=srca) # Adjoint test: Verify <Ax,y> matches <x, A^Ty> closely term1 = np.dot(srca.data.reshape(-1), solver.geometry.src.data) term2 = linalg.norm(rec.data.reshape(-1)) ** 2 info('<Ax,y>: %f, <x, A^Ty>: %f, difference: %4.4e, ratio: %f' % (term1, term2, (term1 - term2)/term1, term1 / term2)) assert np.isclose((term1 - term2)/term1, 0., rtol=1.e-10)
def print_defaults(): """Print the environment variables accepted by Devito, their default value, as well as all of the accepted values.""" from devito.logger import info for k, v in env_vars_mapper.items(): info('%s: %s. Default: %s' % (k, configuration._accepted[v], configuration._defaults[v]))
def run(shape=(50, 50), spacing=(20.0, 20.0), tn=1000.0, space_order=4, nbpml=40, autotune=False, constant=False, **kwargs): solver = elastic_setup(shape=shape, spacing=spacing, nbpml=nbpml, tn=tn, space_order=space_order, constant=constant, **kwargs) info("Applying Forward") # Define receiver geometry (spread across x, just below surface) rec1, rec2, vx, vz, txx, tzz, txz, summary = solver.forward(autotune=autotune) return rec1, rec2, vx, vz, txx, tzz, txz, summary
def _profile_output(self, args): """Produce a performance summary of the profiled sections.""" summary = self._profiler.summary(args, self._dtype) info("Operator `%s` run in %.2f s" % (self.name, sum(summary.timings.values()))) for k, v in summary.items(): itershapes = [",".join(str(i) for i in its) for its in v.itershapes] if len(itershapes) > 1: name = "%s<%s>" % (k, ",".join("<%s>" % i for i in itershapes)) elif len(itershapes) == 1: name = "%s<%s>" % (k, itershapes[0]) else: name = None gpointss = ", %.2f GPts/s" % v.gpointss if v.gpointss else '' perf("* %s with OI=%.2f computed in %.3f s [%.2f GFlops/s%s]" % (name, v.oi, v.time, v.gflopss, gpointss)) return summary
def test_adjoint_J(self, shape, space_order): """ Adjoint test for the FWI Jacobian operator. The Jacobian operator J generates a linearized shot record (measurements) from a model perturbation dm while the adjoint of J generates the FWI gradient from an adjoint source (usually data residual). This test uses the conventional dot test: < Jx, y> = <x ,J^T y> """ tn = 500. # Final time nbpml = 10 + space_order / 2 spacing = tuple([10.]*len(shape)) # Create solver from preset solver = acoustic_setup(shape=shape, spacing=spacing, nbpml=nbpml, tn=tn, space_order=space_order, preset='layers-isotropic', dtype=np.float64) # Create initial model (m0) with a constant velocity throughout model0 = demo_model('layers-isotropic', ratio=3, vp_top=1.5, vp_bottom=1.5, spacing=spacing, space_order=space_order, shape=shape, nbpml=nbpml, dtype=np.float64, grid=solver.model.grid) # Compute the full wavefield u0 _, u0, _ = solver.forward(save=True, m=model0.m) # Compute initial born perturbation from m - m0 dm = (solver.model.m.data - model0.m.data) du, _, _, _ = solver.born(dm, m=model0.m) # Compute gradientfrom initial perturbation im, _ = solver.gradient(du, u0, m=model0.m) # Adjoint test: Verify <Ax,y> matches <x, A^Ty> closely term1 = np.dot(im.data.reshape(-1), dm.reshape(-1)) term2 = linalg.norm(du.data.reshape(-1))**2 info('<Jx,y>: %f, <x, J^Ty>: %f, difference: %4.4e, ratio: %f' % (term1, term2, (term1 - term2)/term1, term1 / term2)) assert np.isclose((term1 - term2)/term1, 0., rtol=1.e-10)
def run(shape=(50, 50, 50), spacing=(20.0, 20.0, 20.0), tn=1000.0, space_order=4, kernel='OT2', nbpml=40, full_run=False, autotune=False, constant=False, checkpointing=False, **kwargs): solver = acoustic_setup(shape=shape, spacing=spacing, nbpml=nbpml, tn=tn, space_order=space_order, kernel=kernel, constant=constant, **kwargs) initial_vp = smooth10(solver.model.m.data, solver.model.shape_domain) dm = np.float32(initial_vp**2 - solver.model.m.data) info("Applying Forward") # Whether or not we save the whole time history. We only need the full wavefield # with 'save=True' if we compute the gradient without checkpointing, if we use # checkpointing, PyRevolve will take care of the time history save = full_run and not checkpointing # Define receiver geometry (spread across x, just below surface) rec, u, summary = solver.forward(save=save, autotune=autotune) if constant: # With a new m as Constant m0 = Constant(name="m", value=.25, dtype=np.float32) solver.forward(save=save, m=m0) # With a new m as a scalar value solver.forward(save=save, m=.25) if not full_run: return summary.gflopss, summary.oi, summary.timings, [rec, u.data] info("Applying Adjoint") solver.adjoint(rec, autotune=autotune) info("Applying Born") solver.born(dm, autotune=autotune) info("Applying Gradient") solver.gradient(rec, u, autotune=autotune, checkpointing=checkpointing)
def run(shape=(50, 50), spacing=(20.0, 20.0), tn=1.0, space_order=4, nbpml=40, autotune=False, constant=True, save=False, **kwargs): solver = poroelastic_setup(shape=shape, spacing=spacing, nbpml=nbpml, tn=tn, space_order=space_order, constant=constant, **kwargs) info("Applying Forward") # Define receiver geometry (spread across x, just below surface) rec1, rec2, vx, vz, qx, qz, txx, tzz, txz, p, summary = solver.forward( autotune=autotune, save=save) from IPython import embed embed() return rec1, rec2, vx, vz, qx, qz, txx, tzz, txz, p, summary
def test_adjoint_F(self, mkey, shape, kernel, space_order): """ Adjoint test for the forward modeling operator. The forward modeling operator F generates a shot record (measurements) from a source while the adjoint of F generates measurments at the source location from data. This test uses the conventional dot test: < Fx, y> = <x, F^T y> """ tn = 500. # Final time # Create solver from preset solver = acoustic_setup(shape=shape, spacing=[15. for _ in shape], kernel=kernel, nbl=10, tn=tn, space_order=space_order, **(presets[mkey]), dtype=np.float64) # Create adjoint receiver symbol srca = Receiver(name='srca', grid=solver.model.grid, time_range=solver.geometry.time_axis, coordinates=solver.geometry.src_positions) # Run forward and adjoint operators rec, _, _ = solver.forward(save=False) solver.adjoint(rec=rec, srca=srca) # Adjoint test: Verify <Ax,y> matches <x, A^Ty> closely term1 = np.dot(srca.data.reshape(-1), solver.geometry.src.data) term2 = norm(rec)**2 info('<Ax,y>: %f, <x, A^Ty>: %f, difference: %4.4e, ratio: %f' % (term1, term2, (term1 - term2) / term1, term1 / term2)) assert np.isclose((term1 - term2) / term1, 0., atol=1.e-12)
def run(shape=(50, 50), spacing=(20.0, 20.0), tn=1000.0, space_order=4, nbl=40, autotune=False, constant=False, kernel='blanch_symes', **kwargs): solver = viscoacoustic_setup(shape=shape, spacing=spacing, nbl=nbl, tn=tn, space_order=space_order, constant=constant, kernel=kernel, **kwargs) info("Applying Forward") # Define receiver geometry (spread across x, just below surface) rec, p, summary = solver.forward(autotune=autotune) return (summary.gflopss, summary.oi, summary.timings, [rec])
def acoustic(shape=(50, 50, 50), spacing=(20.0, 20.0, 20.0), tn=1000.0, time_order=2, space_order=4, nbpml=40, full_run=False, autotune=False, constant=False, skew=0, iterations=0, **kwargs): configuration['skew_factor'] = skew solver = acoustic_setup(shape=shape, spacing=spacing, nbpml=nbpml, tn=tn, space_order=space_order, time_order=time_order, constant=constant, **kwargs) initial_vp = smooth10(solver.model.m.data, solver.model.shape_domain) dm = np.float32(initial_vp**2 - solver.model.m.data) info("Applying Forward") rec, u, summary = solver.forward(save=iterations + 1, autotune=autotune) if constant: # With a new m as Constant m0 = Constant(name="m", value=.25, dtype=np.float32) solver.forward(save=full_run, m=m0) # With a new m as a scalar value solver.forward(save=full_run, m=.25) if not full_run: return u.data info("Applying Adjoint") solver.adjoint(rec, autotune=autotune) info("Applying Born") solver.born(dm, autotune=autotune) info("Applying Gradient") solver.gradient(rec, u, autotune=autotune)
def run(shape=(50, 50, 50), spacing=(20.0, 20.0, 20.0), tn=1000.0, space_order=4, kernel='OT2', nbpml=40, full_run=False, autotune=False, constant=False, **kwargs): solver = acoustic_setup(shape=shape, spacing=spacing, nbpml=nbpml, tn=tn, space_order=space_order, kernel=kernel, constant=constant, **kwargs) initial_vp = smooth10(solver.model.m.data, solver.model.shape_domain) dm = np.float32(initial_vp**2 - solver.model.m.data) info("Applying Forward") # Define receiver geometry (spread across x, just below surface) rec, u, summary = solver.forward(save=full_run, autotune=autotune) if constant: # With a new m as Constant m0 = Constant(name="m", value=.25, dtype=np.float32) solver.forward(save=full_run, m=m0) # With a new m as a scalar value solver.forward(save=full_run, m=.25) if not full_run: return summary.gflopss, summary.oi, summary.timings, [rec, u.data] info("Applying Adjoint") solver.adjoint(rec, autotune=autotune) info("Applying Born") solver.born(dm, autotune=autotune) info("Applying Gradient") solver.gradient(rec, u, autotune=autotune)
def run(dimensions=(50, 50, 50), spacing=(20.0, 20.0, 20.0), tn=1000.0, time_order=2, space_order=4, nbpml=40, full_run=False, **kwargs): solver = setup(dimensions=dimensions, spacing=spacing, nbpml=nbpml, tn=tn, space_order=space_order, time_order=time_order, **kwargs) initial_vp = smooth10(solver.model.m.data, solver.model.shape_domain) dm = np.float32(initial_vp**2 - solver.model.m.data) info("Applying Forward") rec, u, summary = solver.forward(save=full_run, **kwargs) if not full_run: return summary.gflopss, summary.oi, summary.timings, [rec, u.data] info("Applying Adjoint") solver.adjoint(rec, **kwargs) info("Applying Born") solver.born(dm, **kwargs) info("Applying Gradient") solver.gradient(rec, u, **kwargs)
def run(shape=(50, 50, 50), spacing=(20.0, 20.0, 20.0), tn=1000.0, space_order=4, kernel='OT2', nbl=40, full_run=False, fs=False, autotune=False, preset='layers-isotropic', checkpointing=False, **kwargs): solver = acoustic_setup(shape=shape, spacing=spacing, nbl=nbl, tn=tn, space_order=space_order, kernel=kernel, fs=fs, preset=preset, **kwargs) info("Applying Forward") # Whether or not we save the whole time history. We only need the full wavefield # with 'save=True' if we compute the gradient without checkpointing, if we use # checkpointing, PyRevolve will take care of the time history save = full_run and not checkpointing # Define receiver geometry (spread across x, just below surface) rec, u, summary = solver.forward(save=save, autotune=autotune) if preset == 'constant': # With a new m as Constant v0 = Constant(name="v", value=2.0, dtype=np.float32) solver.forward(save=save, vp=v0) # With a new vp as a scalar value solver.forward(save=save, vp=2.0) if not full_run: return summary.gflopss, summary.oi, summary.timings, [rec, u.data] # Smooth velocity initial_vp = Function(name='v0', grid=solver.model.grid, space_order=space_order) smooth(initial_vp, solver.model.vp) dm = np.float32(initial_vp.data**(-2) - solver.model.vp.data**(-2)) info("Applying Adjoint") solver.adjoint(rec, autotune=autotune) info("Applying Born") solver.jacobian(dm, autotune=autotune) info("Applying Gradient") solver.jacobian_adjoint(rec, u, autotune=autotune, checkpointing=checkpointing) return summary.gflopss, summary.oi, summary.timings, [rec, u.data]
def check_control(result, no_bl): info("result nonzero-count: %d" % np.count_nonzero(result.data)) info("untile nonzero-count: %d" % np.count_nonzero(untile.data)) assert len(result.data) == len(untile.data) i = len(result.data) - 1 l, r = 0, i while l < r - 1: comp = compare(result[i], no_bl[i]) info("t=%d: max diff: %f, np.eq: %s, close: %s" % (i, comp[0], comp[1], comp[2])) if comp[2] and not np.isnan(comp[0]): l = i else: r = i i = int((l + r) / 2)
def run(shape=(50, 50, 50), spacing=(20.0, 20.0, 20.0), tn=1000.0, space_order=4, kernel='OT2', nbpml=40, full_run=False, autotune=False, preset='layers-isotropic', checkpointing=False, **kwargs): solver = acoustic_setup(shape=shape, spacing=spacing, nbpml=nbpml, tn=tn, space_order=space_order, kernel=kernel, preset=preset, **kwargs) info("Applying Forward") # Whether or not we save the whole time history. We only need the full wavefield # with 'save=True' if we compute the gradient without checkpointing, if we use # checkpointing, PyRevolve will take care of the time history save = full_run and not checkpointing # Define receiver geometry (spread across x, just below surface) rec, u, summary = solver.forward(save=save, autotune=autotune) if preset == 'constant': # With a new m as Constant m0 = Constant(name="m", value=.25, dtype=np.float32) solver.forward(save=save, m=m0) # With a new m as a scalar value solver.forward(save=save, m=.25) if not full_run: return summary.gflopss, summary.oi, summary.timings, [rec, u.data] # Smooth velocity initial_vp = Function(name='v0', grid=solver.model.grid, space_order=space_order) smooth(initial_vp, solver.model.m) dm = np.float32(initial_vp.data**2 - solver.model.m.data) info("Applying Adjoint") solver.adjoint(rec, autotune=autotune) info("Applying Born") solver.born(dm, autotune=autotune) info("Applying Gradient") solver.gradient(rec, u, autotune=autotune, checkpointing=checkpointing)
def run(shape=(50, 50, 50), spacing=(10.0, 10.0, 10.0), tn=1000.0, space_order=4, nbl=40, full_run=False, autotune=False, **kwargs): solver = acoustic_ssa_setup(shape=shape, spacing=spacing, nbl=nbl, tn=tn, space_order=space_order, **kwargs) info("Applying Forward") # Define receiver geometry (spread across x, just below surface) rec, u, summary = solver.forward(save=full_run, autotune=autotune) if not full_run: return summary.gflopss, summary.oi, summary.timings, [rec, u.data] # Smooth velocity initial_vp = Function(name='v0', grid=solver.model.grid, space_order=space_order) smooth(initial_vp, solver.model.vp) dm = solver.model.vp - initial_vp info("Applying Adjoint") solver.adjoint(rec, autotune=autotune) info("Applying Born") solver.jacobian(dm, autotune=autotune) info("Applying Gradient") solver.jacobian_adjoint(rec, u, autotune=autotune) return summary.gflopss, summary.oi, summary.timings, [rec, u.data]
def save_rec(recx, recy, recz, src_coords, recloc, nt, dt): comm = recx.grid.distributor.comm rank = comm.Get_rank() if recx.data.size != 0: recx_save, coords = resample(recx, nt) recy_save, _ = resample(recy, nt) recz_save, _ = resample(recz, nt) info("From rank %s, shot record of size %s, number of rec locations %s" % (rank, recx_save.shape, coords.shape)) info("From rank %s, writing %s in recx, maximum value is %s" % (rank, recx_save.shape, np.max(recx_save))) segy_write(recx_save, [src_coords[0]], [src_coords[-1]], coords[:, 0], coords[:, -1], dt, "%srecx_%s.segy" % (recloc, rank), sourceY=[src_coords[1]], groupY=coords[:, 1]) info("From rank %s, writing %s in recy" % (rank, recy_save.shape)) segy_write(recy_save, [src_coords[0]], [src_coords[-1]], coords[:, 0], coords[:, -1], dt, "%srecy_%s.segy" % (recloc, rank), sourceY=[src_coords[1]], groupY=coords[:, 1]) info("From rank %s, writing %s in recz" % (rank, recz_save.shape)) segy_write(recz_save, [src_coords[0]], [src_coords[-1]], coords[:, 0], coords[:, -1], dt, "%srecz_%s.segy" % (recloc, rank), sourceY=[src_coords[1]], groupY=coords[:, 1])
def test_concurrent_executing_operators(): rng = np.random.default_rng() # build a simple operator and force it to compile grid = Grid(shape=(50, 50, 50)) u = TimeFunction(name='u', grid=grid) op = Operator(Eq(u.forward, u + 1)) # this forces the compile op.cfunction def do_run(op): # choose a new size shape = (rng.integers(20, 22), 30, rng.integers(20, 22)) # make concurrent executions put a different value in the array # so we can be sure they aren't sharing an object even though the # name is reused val = current_thread().ident % 100000 grid_private = Grid(shape=shape) u_private = TimeFunction(name='u', grid=grid_private) u_private.data[:] = val op(u=u_private, time_m=1, time_M=100) assert np.all(u_private.data[1, :, :, :] == val + 100) info("First running serially to demonstrate it works") do_run(op) info("Now creating thread pool") tpe = ThreadPoolExecutor(max_workers=16) info("Running operator in threadpool") futures = [] for i in range(1000): futures.append(tpe.submit(do_run, op)) # Get results - exceptions will be raised here if there are any for f in futures: f.result()
def run(space_order=4, kernel='OT2', nbpml=40, autotune=False, filename='', chunk=1000000, algo=None, shuffle="SHUFFLE", **kwargs): solver = overthrust_setup(filename=filename, nbpml=nbpml, space_order=space_order, kernel=kernel, **kwargs) m = solver.model.m dt = solver.dt u = TimeFunction(name='u', grid=solver.model.grid, time_order=2, space_order=solver.space_order) v = TimeFunction(name='v', grid=solver.model.grid, time_order=2, space_order=solver.space_order) rec = Receiver(name='rec', grid=solver.model.grid, time_range=solver.receiver.time_range, coordinates=solver.receiver.coordinates.data) grad = Function(name='grad', grid=solver.model.grid) cp = CompressionCheckpoint([u]) n_checkpoints = 60 wrap_fw = CheckpointOperator(solver.op_fwd(save=False), src=solver.source, u=u, m=m, dt=solver.dt, rec=rec) wrap_rev = CheckpointOperator(solver.op_grad(save=False), u=u, v=v, m=m, rec=rec, dt=dt, grad=grad) # Run forward wrp = Revolver(cp, wrap_fw, wrap_rev, n_checkpoints, rec.data.shape[0] - 2, compression='blosc', compression_params={ CHUNK_SIZE: chunk, CNAME: algo, SHUFFLE: shuffle }) info("Applying Forward") solver.forward(time=100) #raw_fw(dt=dt) info("Again") wrp.apply_forward() print(np.linalg.norm(u.data)) print(np.linalg.norm(rec.data)) info("Applying Gradient") summary = wrp.apply_reverse() print("Gradient is: %d" % np.linalg.norm(grad.data))
def autotune(operator, arguments, parameters, tunable): """ Acting as a high-order function, take as input an operator and a list of operator arguments to perform empirical autotuning. Some of the operator arguments are marked as tunable. """ # We get passed all the arguments, but the cfunction only requires a subset at_arguments = OrderedDict([(p.name, arguments[p.name]) for p in parameters]) # User-provided output data must not be altered output = [i.name for i in operator.output] for k, v in arguments.items(): if k in output: at_arguments[k] = v.copy() iterations = FindNodes(Iteration).visit(operator.body) dim_mapper = {i.dim.name: i.dim for i in iterations} # Shrink the iteration space of time-stepping dimension so that auto-tuner # runs will finish quickly steppers = [i for i in iterations if i.dim.is_Time] if len(steppers) == 0: timesteps = 1 elif len(steppers) == 1: stepper = steppers[0] start = at_arguments[stepper.dim.min_name] timesteps = stepper.extent(start=start, finish=options['at_squeezer']) - 1 if timesteps < 0: timesteps = options['at_squeezer'] - timesteps perf("AT: Number of timesteps adjusted to %d" % timesteps) at_arguments[stepper.dim.min_name] = start at_arguments[stepper.dim.max_name] = timesteps if stepper.dim.is_Stepping: at_arguments[stepper.dim.parent.min_name] = start at_arguments[stepper.dim.parent.max_name] = timesteps else: warning("AT: Couldn't understand loop structure; giving up") return arguments # Attempted block sizes ... mapper = OrderedDict([(i.argument.symbolic_size.name, i) for i in tunable]) # ... Defaults (basic mode) blocksizes = [OrderedDict([(i, v) for i in mapper]) for v in options['at_blocksize']] # ... Always try the entire iteration space (degenerate block) itershape = [mapper[i].iteration.symbolic_extent.subs(arguments) for i in mapper] blocksizes.append(OrderedDict([(i, mapper[i].iteration.extent(0, j-1)) for i, j in zip(mapper, itershape)])) # ... More attempts if auto-tuning in aggressive mode if configuration['autotuning'].level == 'aggressive': blocksizes = more_heuristic_attempts(blocksizes) # How many temporaries are allocated on the stack? # Will drop block sizes that might lead to a stack overflow functions = FindSymbols('symbolics').visit(operator.body + operator.elemental_functions) stack_shapes = [i.symbolic_shape for i in functions if i.is_Array and i._mem_stack] stack_space = sum(reduce(mul, i, 1) for i in stack_shapes)*operator._dtype().itemsize # Note: there is only a single loop over 'blocksize' because only # square blocks are tested timings = OrderedDict() for bs in blocksizes: illegal = False for k, v in at_arguments.items(): if k in bs: val = bs[k] start = mapper[k].original_dim.symbolic_start.subs(arguments) end = mapper[k].original_dim.symbolic_end.subs(arguments) if val <= mapper[k].iteration.extent(start, end): at_arguments[k] = val else: # Block size cannot be larger than actual dimension illegal = True break if illegal: continue # Make sure we remain within stack bounds, otherwise skip block size dim_sizes = {} for k, v in at_arguments.items(): if k in bs: dim_sizes[mapper[k].argument.symbolic_size] = bs[k] elif k in dim_mapper: dim_sizes[dim_mapper[k].symbolic_size] = v try: bs_stack_space = stack_space.xreplace(dim_sizes) except AttributeError: bs_stack_space = stack_space try: if int(bs_stack_space) > options['at_stack_limit']: continue except TypeError: # We should never get here warning("AT: Couldn't determine stack size; skipping block size %s" % str(bs)) continue # Use AT-specific profiler structs timer = operator.profiler.new() at_arguments[operator.profiler.name] = timer operator.cfunction(*list(at_arguments.values())) elapsed = sum(getattr(timer._obj, i) for i, _ in timer._obj._fields_) timings[tuple(bs.items())] = elapsed perf("AT: Block shape <%s> took %f (s) in %d timesteps" % (','.join('%d' % i for i in bs.values()), elapsed, timesteps)) try: best = dict(min(timings, key=timings.get)) info("Auto-tuned block shape: %s" % best) except ValueError: info("Auto-tuning request, but couldn't find legal block sizes") return arguments # Build the new argument list tuned = OrderedDict() for k, v in arguments.items(): tuned[k] = best[k] if k in mapper else v # Reset the profiling struct assert operator.profiler.name in tuned tuned[operator.profiler.name] = operator.profiler.new() return tuned
def test_adjoint_F(self, mkey, shape, kernel, space_order, nbpml): """ Adjoint test for the forward modeling operator. The forward modeling operator F generates a shot record (measurements) from a source while the adjoint of F generates measurments at the source location from data. This test uses the conventional dot test: < Fx, y> = <x, F^T y> """ t0 = 0.0 # Start time tn = 500. # Final time nrec = 130 # Number of receivers # Create model from preset model = demo_model(spacing=[15. for _ in shape], dtype=np.float64, space_order=space_order, shape=shape, nbpml=nbpml, **(presets[mkey])) # Derive timestepping from model spacing dt = model.critical_dt * (1.73 if kernel == 'OT4' else 1.0) time_range = TimeAxis(start=t0, stop=tn, step=dt) # Define source geometry (center of domain, just below surface) src = RickerSource(name='src', grid=model.grid, f0=0.01, time_range=time_range) src.coordinates.data[0, :] = np.array(model.domain_size) * .5 src.coordinates.data[0, -1] = 30. # Define receiver geometry (same as source, but spread across x) rec = Receiver(name='rec', grid=model.grid, time_range=time_range, npoint=nrec) rec.coordinates.data[:, 0] = np.linspace(0., model.domain_size[0], num=nrec) rec.coordinates.data[:, 1:] = src.coordinates.data[0, 1:] # Create solver object to provide relevant operators solver = AcousticWaveSolver(model, source=src, receiver=rec, kernel=kernel, space_order=space_order) # Create adjoint receiver symbol srca = Receiver(name='srca', grid=model.grid, time_range=solver.source.time_range, coordinates=solver.source.coordinates.data) # Run forward and adjoint operators rec, _, _ = solver.forward(save=False) solver.adjoint(rec=rec, srca=srca) # Adjoint test: Verify <Ax,y> matches <x, A^Ty> closely term1 = np.dot(srca.data.reshape(-1), solver.source.data) term2 = linalg.norm(rec.data.reshape(-1))**2 info('<Ax,y>: %f, <x, A^Ty>: %f, difference: %4.4e, ratio: %f' % (term1, term2, (term1 - term2) / term1, term1 / term2)) assert np.isclose((term1 - term2) / term1, 0., rtol=1.e-10)
def print_defaults(): """Print the environment variables accepted by Devito, their default value, as well as all of the accepted values.""" for k, v in env_vars_mapper.items(): info('%s: %s. Default: %s' % (k, configuration._accepted[v], configuration._defaults[v]))
def _emit_apply_profiling(self, args): """Produce a performance summary of the profiled sections.""" # Rounder to 2 decimal places fround = lambda i: ceil(i * 100) / 100 info("Operator `%s` ran in %.2f s" % (self.name, fround(self._profiler.py_timers['apply']))) summary = self._profiler.summary(args, self._dtype, reduce_over='apply') if not is_log_enabled_for('PERF'): # Do not waste time return summary if summary.globals: # Note that with MPI enabled, the global performance indicators # represent "cross-rank" performance data metrics = [] v = summary.globals.get('vanilla') if v is not None: metrics.append("OI=%.2f" % fround(v.oi)) metrics.append("%.2f GFlops/s" % fround(v.gflopss)) v = summary.globals.get('fdlike') if v is not None: metrics.append("%.2f GPts/s" % fround(v.gpointss)) if metrics: perf("Global performance: [%s]" % ', '.join(metrics)) perf("Local performance:") indent = " " * 2 else: indent = "" # Emit local, i.e. "per-rank" performance. Without MPI, this is the only # thing that will be emitted for k, v in summary.items(): rank = "[rank%d]" % k.rank if k.rank is not None else "" oi = "OI=%.2f" % fround(v.oi) gflopss = "%.2f GFlops/s" % fround(v.gflopss) gpointss = "%.2f GPts/s" % fround( v.gpointss) if v.gpointss else None metrics = ", ".join(i for i in [oi, gflopss, gpointss] if i is not None) itershapes = [ ",".join(str(i) for i in its) for its in v.itershapes ] if len(itershapes) > 1: itershapes = ",".join("<%s>" % i for i in itershapes) elif len(itershapes) == 1: itershapes = itershapes[0] else: itershapes = "" name = "%s%s<%s>" % (k.name, rank, itershapes) perf("%s* %s ran in %.2f s [%s]" % (indent, name, fround(v.time), metrics)) for n, time in summary.subsections.get(k.name, {}).items(): perf("%s+ %s ran in %.2f s [%.2f%%]" % (indent * 2, n, time, fround(time / v.time * 100))) # Emit performance mode and arguments perf_args = {} for i in self.input + self.dimensions: if not i.is_PerfKnob: continue try: perf_args[i.name] = args[i.name] except KeyError: # Try with the aliases for a in i._arg_names: if a in args: perf_args[a] = args[a] break perf("Performance[mode=%s] arguments: %s" % (self._state['optimizations'], perf_args)) return summary
def timer(start, message): end = time.time() hours, rem = divmod(end - start, 3600) minutes, seconds = divmod(rem, 60) info('{}: {:d}:{:02d}:{:02d}'.format(message, int(hours), int(minutes), int(seconds)))
def _profile_output(self, args): """Produce a performance summary of the profiled sections.""" # Rounder to 2 decimal places fround = lambda i: ceil(i * 100) / 100 info("Operator `%s` run in %.2f s" % (self.name, fround(self._profiler.py_timers['apply']))) summary = self._profiler.summary(args, self._dtype, reduce_over='apply') if summary.globals: indent = " " * 2 perf("Global performance indicators") # With MPI enabled, the 'vanilla' entry contains "cross-rank" performance data v = summary.globals.get('vanilla') if v is not None: gflopss = "%.2f GFlops/s" % fround(v.gflopss) gpointss = "%.2f GPts/s" % fround( v.gpointss) if v.gpointss else None metrics = ", ".join(i for i in [gflopss, gpointss] if i is not None) perf( "%s* Operator `%s` with OI=%.2f computed in %.2f s [%s]" % (indent, self.name, fround(v.oi), fround(v.time), metrics)) v = summary.globals.get('fdlike') if v is not None: perf("%s* Achieved %.2f FD-GPts/s" % (indent, v.gpointss)) perf("Local performance indicators") else: indent = "" # Emit local, i.e. "per-rank" performance. Without MPI, this is the only # thing that will be emitted for k, v in summary.items(): rank = "[rank%d]" % k.rank if k.rank is not None else "" gflopss = "%.2f GFlops/s" % fround(v.gflopss) gpointss = "%.2f GPts/s" % fround( v.gpointss) if v.gpointss else None metrics = ", ".join(i for i in [gflopss, gpointss] if i is not None) itershapes = [ ",".join(str(i) for i in its) for its in v.itershapes ] if len(itershapes) > 1: name = "%s%s<%s>" % (k.name, rank, ",".join( "<%s>" % i for i in itershapes)) perf("%s* %s with OI=%.2f computed in %.2f s [%s]" % (indent, name, fround(v.oi), fround(v.time), metrics)) elif len(itershapes) == 1: name = "%s%s<%s>" % (k.name, rank, itershapes[0]) perf("%s* %s with OI=%.2f computed in %.2f s [%s]" % (indent, name, fround(v.oi), fround(v.time), metrics)) else: name = k.name perf("%s* %s%s computed in %.2f s" % (indent, name, rank, fround(v.time))) perf("Configuration: %s" % self._state['optimizations']) return summary
def print_state(): """Print the current configuration state.""" for k, v in configuration.items(): info('%s: %s' % (k, v))
def _emit_apply_profiling(self, args): """Produce a performance summary of the profiled sections.""" # Rounder to 2 decimal places fround = lambda i: ceil(i * 100) / 100 info("Operator `%s` run in %.2f s" % (self.name, fround(self._profiler.py_timers['apply']))) summary = self._profiler.summary(args, self._dtype, reduce_over='apply') if not is_log_enabled_for('PERF'): # Do not waste time return summary if summary.globals: indent = " " * 2 perf("Global performance indicators") # With MPI enabled, the 'vanilla' entry contains "cross-rank" performance data v = summary.globals.get('vanilla') if v is not None: gflopss = "%.2f GFlops/s" % fround(v.gflopss) gpointss = "%.2f GPts/s" % fround( v.gpointss) if v.gpointss else None metrics = ", ".join(i for i in [gflopss, gpointss] if i is not None) perf( "%s* Operator `%s` with OI=%.2f computed in %.2f s [%s]" % (indent, self.name, fround(v.oi), fround(v.time), metrics)) v = summary.globals.get('fdlike') if v is not None: perf("%s* Achieved %.2f FD-GPts/s" % (indent, v.gpointss)) perf("Local performance indicators") else: indent = "" # Emit local, i.e. "per-rank" performance. Without MPI, this is the only # thing that will be emitted for k, v in summary.items(): rank = "[rank%d]" % k.rank if k.rank is not None else "" gflopss = "%.2f GFlops/s" % fround(v.gflopss) gpointss = "%.2f GPts/s" % fround( v.gpointss) if v.gpointss else None metrics = ", ".join(i for i in [gflopss, gpointss] if i is not None) itershapes = [ ",".join(str(i) for i in its) for its in v.itershapes ] if len(itershapes) > 1: name = "%s%s<%s>" % (k.name, rank, ",".join( "<%s>" % i for i in itershapes)) perf("%s* %s with OI=%.2f computed in %.2f s [%s]" % (indent, name, fround(v.oi), fround(v.time), metrics)) elif len(itershapes) == 1: name = "%s%s<%s>" % (k.name, rank, itershapes[0]) perf("%s* %s with OI=%.2f computed in %.2f s [%s]" % (indent, name, fround(v.oi), fround(v.time), metrics)) else: name = k.name perf("%s* %s%s computed in %.2f s" % (indent, name, rank, fround(v.time))) # Emit relevant configuration values perf("Configuration: %s" % self._state['optimizations']) # Emit relevant performance arguments perf_args = {} for i in self.input + self.dimensions: if not i.is_PerfKnob: continue try: perf_args[i.name] = args[i.name] except KeyError: # Try with the aliases for a in i._arg_names: if a in args: perf_args[a] = args[a] break perf("Performance arguments: %s" % perf_args) return summary
def execute(self, warmups=1, repeats=3, **params): """ Execute a single benchmark repeatedly, including setup, teardown and postprocessing methods. """ info("Running %d repeats - parameters: %s" % (repeats, ', '.join(['%s: %s' % (k, v) for k, v in params.items()]))) self.reset() for i in range(warmups): info("--- Warmup %d ---" % i) self.setup(**params) self.run(**params) self.teardown(**params) info("--- Warmup %d finished ---" % i) self.reset() for i in range(repeats): info("--- Run %d ---" % i) self.setup(**params) self.run(**params) self.teardown(**params) info("--- Run %d finished ---" % i) info("") # Average timings across repeats for rank in self.timings.keys(): for event in self.timings[rank].keys(): for measure in self.timings[rank][event].keys(): self.timings[rank][event][measure] /= repeats # Collect meta-information via post-processing methods self.postprocess(**params)
def test_gradientFWI(shape, kernel, space_order): """ This test ensure that the FWI gradient computed with devito satisfies the Taylor expansion property: .. math:: \Phi(m0 + h dm) = \Phi(m0) + \O(h) \\ \Phi(m0 + h dm) = \Phi(m0) + h \nabla \Phi(m0) + \O(h^2) \\ \Phi(m0) = .5* || F(m0 + h dm) - D ||_2^2 where .. math:: \nabla \Phi(m0) = <J^T \delta d, dm> \\ \delta d = F(m0+ h dm) - D \\ with F the Forward modelling operator. :param dimensions: size of the domain in all dimensions in number of grid points :param time_order: order of the time discretization scheme :param space_order: order of the spacial discretization scheme :return: assertion that the Taylor properties are satisfied """ spacing = tuple(15. for _ in shape) wave = setup(shape=shape, spacing=spacing, dtype=np.float64, kernel=kernel, space_order=space_order, nbpml=10 + space_order / 2) m0 = smooth10(wave.model.m.data, wave.model.shape_domain) dm = np.float32(wave.model.m.data - m0) # Compute receiver data for the true velocity rec, u, _ = wave.forward() # Compute receiver data and full wavefield for the smooth velocity rec0, u0, _ = wave.forward(m=m0, save=True) # Objective function value F0 = .5 * linalg.norm(rec0.data - rec.data)**2 # Gradient: <J^T \delta d, dm> residual = Receiver(name='rec', grid=wave.model.grid, data=rec0.data - rec.data, coordinates=rec0.coordinates.data) gradient, _ = wave.gradient(residual, u0, m=m0) G = np.dot(gradient.data.reshape(-1), dm.reshape(-1)) # FWI Gradient test H = [0.5, 0.25, .125, 0.0625, 0.0312, 0.015625, 0.0078125] error1 = np.zeros(7) error2 = np.zeros(7) for i in range(0, 7): # Add the perturbation to the model mloc = m0 + H[i] * dm # Data for the new model d = wave.forward(m=mloc)[0] # First order error Phi(m0+dm) - Phi(m0) error1[i] = np.absolute(.5 * linalg.norm(d.data - rec.data)**2 - F0) # Second order term r Phi(m0+dm) - Phi(m0) - <J(m0)^T \delta d, dm> error2[i] = np.absolute(.5 * linalg.norm(d.data - rec.data)**2 - F0 - H[i] * G) # Test slope of the tests p1 = np.polyfit(np.log10(H), np.log10(error1), 1) p2 = np.polyfit(np.log10(H), np.log10(error2), 1) info('1st order error, Phi(m0+dm)-Phi(m0): %s' % (p1)) info('2nd order error, Phi(m0+dm)-Phi(m0) - <J(m0)^T \delta d, dm>: %s' % (p2)) assert np.isclose(p1[0], 1.0, rtol=0.1) assert np.isclose(p2[0], 2.0, rtol=0.1)
assert len(nzinds) == len(shape) shape = model.grid.shape x, y, z = model.grid.dimensions time = model.grid.time_dim source_mask = Function(name='source_mask', shape=shape, dimensions=(x, y, z), space_order=0, dtype=np.float32) source_id = Function(name='source_id', shape=shape, dimensions=(x, y, z), space_order=0, dtype=np.int32) info("source_id data indexes start from 1 not 0 !!!") # source_id.data[nzinds[0], nzinds[1], nzinds[2]] = tuple(np.arange(1, len(nzinds[0])+1)) source_id.data[nzinds[0], nzinds[1], nzinds[2]] = tuple(np.arange(len(nzinds[0]))) source_mask.data[nzinds[0], nzinds[1], nzinds[2]] = 1 # plot3d(source_mask.data, model) info("Number of unique affected points is: %d", len(nzinds[0]) + 1) # Assert that first and last index are as expected assert (source_id.data[nzinds[0][0], nzinds[1][0], nzinds[2][0]] == 0) assert (source_id.data[nzinds[0][-1], nzinds[1][-1], nzinds[2][-1]] == len(nzinds[0]) - 1) assert (source_id.data[nzinds[0][len(nzinds[0]) - 1],
def run(dimensions=(50, 50, 50), spacing=(20.0, 20.0, 20.0), tn=1000.0, time_order=2, space_order=4, nbpml=40, dse='advanced', dle='advanced', full_run=False): origin = (0., 0., 0.) # True velocity true_vp = 2. # Smooth velocity initial_vp = 1.8 dm = 1. / (true_vp * true_vp) - 1. / (initial_vp * initial_vp) model = Model(origin, spacing, dimensions, true_vp, nbpml=nbpml) dm = np.ones(model.shape_domain, dtype=np.float32) * dm # Define seismic data. f0 = .010 dt = model.critical_dt if time_order == 4: dt *= 1.73 t0 = 0.0 nt = int(1 + (tn - t0) / dt) # Source geometry time_series = np.zeros((nt, 1)) time_series[:, 0] = source(np.linspace(t0, tn, nt), f0) location = np.zeros((1, 3)) location[0, 0] = origin[0] + dimensions[0] * spacing[0] * 0.5 location[0, 1] = origin[1] + dimensions[1] * spacing[1] * 0.5 location[0, 2] = origin[1] + 2 * spacing[2] src = PointSource(name='src', data=time_series, coordinates=location) # Receiver geometry receiver_coords = np.zeros((101, 3)) receiver_coords[:, 0] = np.linspace(0, origin[0] + dimensions[0] * spacing[0], num=101) receiver_coords[:, 1] = origin[1] + dimensions[1] * spacing[1] * 0.5 receiver_coords[:, 2] = location[0, 1] rec = Receiver(name='rec', ntime=nt, coordinates=receiver_coords) solver = AcousticWaveSolver(model, source=src, receiver=rec, time_order=time_order, space_order=space_order) info("Applying Forward") rec, u, summary = solver.forward(save=full_run, dse=dse, dle=dle) if not full_run: return summary.gflopss, summary.oi, summary.timings, [rec, u.data] info("Applying Adjoint") solver.adjoint(rec, dse=dse, dle=dle) info("Applying Born") solver.born(dm, dse=dse, dle=dle) info("Applying Gradient") solver.gradient(rec, u, dse=dse, dle=dle)
def autotune(operator, arguments, tunable, mode='basic'): """ Acting as a high-order function, take as input an operator and a list of operator arguments to perform empirical autotuning. Some of the operator arguments are marked as tunable. """ at_arguments = arguments.copy() # User-provided output data must not be altered output = [i.name for i in operator.output] for k, v in arguments.items(): if k in output: at_arguments[k] = v.copy() # Squeeze dimensions to minimize auto-tuning time iterations = FindNodes(Iteration).visit(operator.body) squeezable = [ i.dim.parent.name for i in iterations if i.is_Sequential and i.dim.is_Buffered ] # Attempted block sizes mapper = OrderedDict([(i.argument.name, i) for i in tunable]) blocksizes = [ OrderedDict([(i, v) for i in mapper]) for v in options['at_blocksize'] ] if mode == 'aggressive': blocksizes = more_heuristic_attempts(blocksizes) # Note: there is only a single loop over 'blocksize' because only # square blocks are tested timings = OrderedDict() for blocksize in blocksizes: illegal = False for k, v in at_arguments.items(): if k in blocksize: val = blocksize[k] handle = at_arguments.get(mapper[k].original_dim.name) if val <= mapper[k].iteration.end(handle): at_arguments[k] = val else: # Block size cannot be larger than actual dimension illegal = True break elif k in squeezable: at_arguments[k] = options['at_squeezer'] if illegal: continue # Add profiler structs at_arguments.update(operator._extra_arguments()) operator.cfunction(*list(at_arguments.values())) elapsed = sum(operator.profiler.timings.values()) timings[tuple(blocksize.items())] = elapsed info_at("<%s>: %f" % (','.join('%d' % i for i in blocksize.values()), elapsed)) best = dict(min(timings, key=timings.get)) info('Auto-tuned block shape: %s' % best) # Build the new argument list tuned = OrderedDict() for k, v in arguments.items(): tuned[k] = best[k] if k in mapper else v return tuned
def test_adjoint_J(self, shape, space_order): """ Adjoint test for the FWI Jacobian operator. The Jacobian operator J generates a linearized shot record (measurements) from a model perturbation dm while the adjoint of J generates the FWI gradient from an adjoint source (usually data residual). This test uses the conventional dot test: < Jx, y> = <x ,J^T y> """ t0 = 0.0 # Start time tn = 500. # Final time nrec = shape[0] # Number of receivers nbpml = 10 + space_order / 2 spacing = [10. for _ in shape] # Create two-layer "true" model from preset with a fault 1/3 way down model = demo_model('layers-isotropic', ratio=3, vp_top=1.5, vp_bottom=2.5, spacing=spacing, space_order=space_order, shape=shape, nbpml=nbpml, dtype=np.float64) # Derive timestepping from model spacing dt = model.critical_dt time_range = TimeAxis(start=t0, stop=tn, step=dt) # Define source geometry (center of domain, just below surface) src = RickerSource(name='src', grid=model.grid, f0=0.01, time_range=time_range) src.coordinates.data[0, :] = np.array(model.domain_size) * .5 src.coordinates.data[0, -1] = 30. # Define receiver geometry (same as source, but spread across x) rec = Receiver(name='nrec', grid=model.grid, time_range=time_range, npoint=nrec) rec.coordinates.data[:, 0] = np.linspace(0., model.domain_size[0], num=nrec) rec.coordinates.data[:, 1:] = src.coordinates.data[0, 1:] # Create solver object to provide relevant operators solver = AcousticWaveSolver(model, source=src, receiver=rec, kernel='OT2', space_order=space_order) # Create initial model (m0) with a constant velocity throughout model0 = demo_model('layers-isotropic', ratio=3, vp_top=1.5, vp_bottom=1.5, spacing=spacing, space_order=space_order, shape=shape, nbpml=nbpml, dtype=np.float64, grid=model.grid) # Compute the full wavefield u0 _, u0, _ = solver.forward(save=True, m=model0.m) # Compute initial born perturbation from m - m0 dm = (model.m.data - model0.m.data) du, _, _, _ = solver.born(dm, m=model0.m) # Compute gradientfrom initial perturbation im, _ = solver.gradient(du, u0, m=model0.m) # Adjoint test: Verify <Ax,y> matches <x, A^Ty> closely term1 = np.dot(im.data.reshape(-1), dm.reshape(-1)) term2 = linalg.norm(du.data.reshape(-1))**2 info('<Jx,y>: %f, <x, J^Ty>: %f, difference: %4.4e, ratio: %f' % (term1, term2, (term1 - term2) / term1, term1 / term2)) assert np.isclose((term1 - term2) / term1, 0., rtol=1.e-10)
def test_acousticJ(shape, space_order): t0 = 0.0 # Start time tn = 500. # Final time nrec = shape[0] # Number of receivers nbpml = 10 + space_order / 2 spacing = [15. for _ in shape] # Create two-layer "true" model from preset with a fault 1/3 way down model = demo_model('layers-isotropic', ratio=3, vp_top=1.5, vp_bottom=2.5, spacing=spacing, shape=shape, nbpml=nbpml) # Derive timestepping from model spacing dt = model.critical_dt nt = int(1 + (tn - t0) / dt) # Number of timesteps time_values = np.linspace(t0, tn, nt) # Discretized time axis # Define source geometry (center of domain, just below surface) src = RickerSource(name='src', ndim=model.dim, f0=0.01, time=time_values) src.coordinates.data[0, :] = np.array(model.domain_size) * .5 src.coordinates.data[0, -1] = 30. # Define receiver geometry (same as source, but spread across x) rec = Receiver(name='nrec', ntime=nt, npoint=nrec, ndim=model.dim) rec.coordinates.data[:, 0] = np.linspace(0., model.domain_size[0], num=nrec) rec.coordinates.data[:, 1:] = src.coordinates.data[0, 1:] # Create solver object to provide relevant operators solver = AcousticWaveSolver(model, source=src, receiver=rec, time_order=2, space_order=space_order) # Create initial model (m0) with a constant velocity throughout model0 = demo_model('layers-isotropic', ratio=3, vp_top=1.5, vp_bottom=1.5, spacing=spacing, shape=shape, nbpml=nbpml) # Compute the full wavefield u0 _, u0, _ = solver.forward(save=True, m=model0.m) # Compute initial born perturbation from m - m0 dm = model.m.data - model0.m.data du, _, _, _ = solver.born(dm, m=model0.m) # Compute gradientfrom initial perturbation im, _ = solver.gradient(du, u0, m=model0.m) # Adjoint test: Verify <Ax,y> matches <x, A^Ty> closely term1 = np.dot(im.data.reshape(-1), dm.reshape(-1)) term2 = linalg.norm(du.data)**2 info('<Ax,y>: %f, <x, A^Ty>: %f, difference: %12.12f, ratio: %f' % (term1, term2, term1 - term2, term1 / term2)) assert np.isclose(term1 / term2, 1.0, atol=0.001)
def print_state(): """Print the current configuration state.""" from devito.logger import info for k, v in configuration.items(): info('%s: %s' % (k, v))
def print_state(): """Print the current configuration state.""" from devito.logger import info for k, v in configuration.items(): info('%s: %s' % (k, v))
xsrc = xsrc_full[shot_id] ysrc = ysrc_full[shot_id] zsrc = zsrc_full[shot_id] # Set up coordinates as nrec x 3 numpy array rec_coordinates = np.concatenate((xrec_full.reshape( -1, 1), yrec_full.reshape(-1, 1), zrec_full.reshape(-1, 1)), axis=1) nrec = rec_coordinates.shape[0] "" # Get MPI info comm = model.grid.distributor.comm rank = comm.Get_rank() size = comm.size info("Min value in vp is %s " % (np.min(model.vp.data[:]))) timer(t0, 'Read segy models') t0 = time.time() ######################################################################################### # Model a 3D shot # Time axis tstart = 0. tn = 1000. dt = model.critical_dt nt = int(tn / dt + 1) f0 = 0.020 time_axis = np.linspace(tstart, tn, nt) ""