def copy_massmatrix_into_dat(self): # Copy the velocity mass matrix into a Dat vmat = self.imass_velocity.handle dofs_per_entity = self.U.fiat_element.entity_dofs() dofs_per_entity = sum( self.mesh.make_dofs_per_plex_entity(dofs_per_entity)) arity = dofs_per_entity * self.U.topological.dim self.velocity_mass_asdat = Dat(DataSet(self.mesh.cell_set, arity * arity), dtype='double') istart, iend = vmat.getOwnershipRange() idxs = [ PETSc.IS().createGeneral(np.arange(i, i + arity, dtype=np.int32), comm=PETSc.COMM_SELF) for i in range(istart, iend, arity) ] submats = vmat.getSubMatrices(idxs, idxs) for i, m in enumerate(submats): self.velocity_mass_asdat.data[i] = m[:, :].flatten() info("Computed velocity mass matrix") # Copy the stress mass matrix into a Dat smat = self.imass_stress.handle dofs_per_entity = self.S.fiat_element.entity_dofs() dofs_per_entity = sum( self.mesh.make_dofs_per_plex_entity(dofs_per_entity)) arity = dofs_per_entity * self.S.topological.dim self.stress_mass_asdat = Dat(DataSet(self.mesh.cell_set, arity * arity), dtype='double') istart, iend = smat.getOwnershipRange() idxs = [ PETSc.IS().createGeneral(np.arange(i, i + arity, dtype=np.int32), comm=PETSc.COMM_SELF) for i in range(istart, iend, arity) ] submats = smat.getSubMatrices(idxs, idxs) for i, m in enumerate(submats): self.stress_mass_asdat.data[i] = m[:, :].flatten() info("Computed stress mass matrix")
def explosive_source_lf4(self, T=2.5, TS=0, Lx=300.0, Ly=150.0, h=2.5, cn=0.05, mesh_file=None, output=1, poly_order=2, params=None): tile_size = params['tile_size'] num_unroll = params['num_unroll'] extra_halo = params['extra_halo'] part_mode = params['partitioning'] explicit_mode = params['explicit_mode'] if explicit_mode: fusion_scheme = FusionSchemes.get(explicit_mode, part_mode, tile_size) num_solves, params['explicit_mode'] = fusion_scheme else: num_solves = ElasticLF4.num_solves if mesh_file: mesh = Mesh(mesh_file) else: mesh = RectangleMesh(int(Lx / h), int(Ly / h), Lx, Ly) set_log_level(INFO) kwargs = {} if params['mode'] in ['tile', 'only_tile']: s_depth = calculate_sdepth(num_solves, num_unroll, extra_halo) if part_mode == 'metis': kwargs['reorder'] = ('metis-rcm', mesh.num_cells() / tile_size) else: s_depth = 1 # FIXME: need s_depth in firedrake to be able to use this # kwargs['s_depth'] = s_depth params['s_depth'] = s_depth mesh.topology.init(**kwargs) slope(mesh, debug=True) # Instantiate the model self.elastic = ElasticLF4(mesh, "DG", poly_order, 2, output, params) info("S-depth used: %d" % s_depth) info("Polynomial order: %d" % poly_order) # Constants self.elastic.density = 1.0 self.elastic.mu = 3600.0 self.elastic.l = 3599.3664 self.Vp = Vp(self.elastic.mu, self.elastic.l, self.elastic.density) self.Vs = Vs(self.elastic.mu, self.elastic.density) info("P-wave velocity: %f" % self.Vp) info("S-wave velocity: %f" % self.Vs) self.dx = h self.courant_number = cn self.elastic.dt = cfl_dt(self.dx, self.Vp, self.courant_number) info("Using a timestep of %f" % self.elastic.dt) # Source exp_area = (44.5, 45.5, Ly - 1.5, Ly - 0.5) if poly_order == 1: # Adjust explosion area exp_area = (149.5, 150.5, Ly - 1.5, Ly - 0.5) a = 159.42 self.elastic.source_expression = Expression((( "x[0] >= %f && x[0] <= %f && x[1] >= %f && x[1] <= %f ? (-1.0 + 2*a*pow(t - 0.3, 2))*exp(-a*pow(t - 0.3, 2)) : 0.0" % exp_area, "0.0" ), ("0.0", "x[0] >= %f && x[0] <= %f && x[1] >= %f && x[1] <= %f ? (-1.0 + 2*a*pow(t - 0.3, 2))*exp(-a*pow(t - 0.3, 2)) : 0.0" % exp_area)), a=a, t=0) self.elastic.source_function = Function(self.elastic.S) self.elastic.source = self.elastic.source_expression # Absorption F = FunctionSpace(mesh, "DG", poly_order, name='F') self.elastic.absorption_function = Function(F) self.elastic.absorption = Expression( "x[0] <= 20 || x[0] >= %f || x[1] <= 20.0 ? 1000 : 0" % (Lx - 20, )) # Initial conditions uic = Expression(('0.0', '0.0')) self.elastic.u0.assign(Function(self.elastic.U).interpolate(uic)) sic = Expression((('0', '0'), ('0', '0'))) self.elastic.s0.assign(Function(self.elastic.S).interpolate(sic)) # Run the simulation start, end, ntimesteps, u1, s1 = self.elastic.run(T, TS=TS) # Print runtime summary output_time(start, end, tofile=params['tofile'], verbose=params['verbose'], meshid=("h%s" % h).replace('.', ''), ntimesteps=ntimesteps, nloops=ElasticLF4.loop_chain_length * num_unroll, partitioning=part_mode, tile_size=tile_size, extra_halo=extra_halo, explicit_mode=explicit_mode, glb_maps=params['use_glb_maps'], prefetch=params['use_prefetch'], coloring=params['coloring'], poly_order=poly_order, domain=os.path.splitext(os.path.basename(mesh.name))[0], function_spaces=[self.elastic.S, self.elastic.U]) return u1, s1
def run(self, T, TS=0): """ Run the elastic wave simulation until t = T or ntimesteps = TS. :param float T: The finish time of the simulation. :param float TS: The maximum number of timesteps performed; ignored if = 0. :returns: The final solution fields for velocity and stress. """ # Write out the initial condition. self.write(self.u1, self.s1, self.tofile) info("Generating inverse mass matrix") # Pre-assemble the inverse mass matrices, which should stay # constant throughout the simulation (assuming no mesh adaptivity). start = time() self.assemble_inverse_mass() end = time() info("DONE! (Elapsed: %f s)" % round(end - start, 3)) op2.MPI.COMM_WORLD.barrier() info("Copying inverse mass matrix into a dat...") start = time() self.copy_massmatrix_into_dat() end = time() info("DONE! (Elapsed: %f s)" % round(end - start, 3)) op2.MPI.COMM_WORLD.barrier() start = time() t = self.dt timestep = 0 ntimesteps = sys.maxint if TS == 0 else TS while t <= T + 1e-12 and timestep < ntimesteps: if op2.MPI.COMM_WORLD.rank == 0 and timestep % self.output == 0: info("t = %f, (timestep = %d)" % (t, timestep)) with loop_chain("main1", tile_size=self.tiling_size, num_unroll=self.tiling_uf, mode=self.tiling_mode, extra_halo=self.tiling_halo, explicit=self.tiling_explicit, use_glb_maps=self.tiling_glb_maps, use_prefetch=self.tiling_prefetch, coloring=self.tiling_coloring, ignore_war=True, log=self.tiling_log): # In case the source is time-dependent, update the time 't' here. if (self.source): with timed_region('source term update'): self.source_expression.t = t self.source = self.source_expression # Solve for the velocity vector field. self.solve(self.rhs_uh1, self.velocity_mass_asdat, self.uh1) self.solve(self.rhs_stemp, self.stress_mass_asdat, self.stemp) self.solve(self.rhs_uh2, self.velocity_mass_asdat, self.uh2) self.solve(self.rhs_u1, self.velocity_mass_asdat, self.u1) # Solve for the stress tensor field. self.solve(self.rhs_sh1, self.stress_mass_asdat, self.sh1) self.solve(self.rhs_utemp, self.velocity_mass_asdat, self.utemp) self.solve(self.rhs_sh2, self.stress_mass_asdat, self.sh2) self.solve(self.rhs_s1, self.stress_mass_asdat, self.s1) self.u0.assign(self.u1) self.s0.assign(self.s1) # Write out the new fields self.write(self.u1, self.s1, self.tofile and timestep % self.output == 0) # Move onto next timestep t += self.dt timestep += 1 # Write out the final state of the fields self.write(self.u1, self.s1, self.tofile) end = time() return start, end, timestep, self.u1, self.s1
def __init__(self, mesh, family, degree, dimension, output=1, params=None): r""" Initialise a new elastic wave simulation. :param mesh: The underlying computational mesh of vertices and edges. :param str family: Specify whether CG or DG should be used. :param int degree: Use polynomial basis functions of this degree. :param int dimension: The spatial dimension of the problem (1, 2 or 3). :param int output: period, in timesteps, to write solution fields to a file. :param dict params: simulation and optimisation parameters :returns: None """ self.degree = degree self.mesh = mesh self.dimension = dimension self.output = output self.tofile = params['tofile'] self.S = TensorFunctionSpace(mesh, family, degree, name='S') self.U = VectorFunctionSpace(mesh, family, degree, name='U') # Assumes that the S and U function spaces are the same. self.S_tot_dofs = op2.MPI.COMM_WORLD.allreduce(self.S.dof_count, op=mpi4py.MPI.SUM) self.U_tot_dofs = op2.MPI.COMM_WORLD.allreduce(self.U.dof_count, op=mpi4py.MPI.SUM) info("Number of degrees of freedom (Velocity): %d" % self.U_tot_dofs) info("Number of degrees of freedom (Stress): %d" % self.S_tot_dofs) self.s = TrialFunction(self.S) self.v = TestFunction(self.S) self.u = TrialFunction(self.U) self.w = TestFunction(self.U) self.s0 = Function(self.S, name="StressOld") self.sh1 = Function(self.S, name="StressHalf1") self.stemp = Function(self.S, name="StressTemp") self.sh2 = Function(self.S, name="StressHalf2") self.s1 = Function(self.S, name="StressNew") self.u0 = Function(self.U, name="VelocityOld") self.uh1 = Function(self.U, name="VelocityHalf1") self.utemp = Function(self.U, name="VelocityTemp") self.uh2 = Function(self.U, name="VelocityHalf2") self.u1 = Function(self.U, name="VelocityNew") self.absorption_function = None self.source_function = None self.source_expression = None self._dt = None self._density = None self._mu = None self._l = None self.n = FacetNormal(self.mesh) self.I = Identity(self.dimension) # Tiling options self.tiling_size = params['tile_size'] self.tiling_uf = params['num_unroll'] self.tiling_mode = params['mode'] self.tiling_halo = params['extra_halo'] self.tiling_explicit = params['explicit_mode'] self.tiling_explicit_id = params['explicit_mode_id'] self.tiling_log = params['log'] self.tiling_sdepth = params['s_depth'] self.tiling_part = params['partitioning'] self.tiling_coloring = params['coloring'] self.tiling_glb_maps = params['use_glb_maps'] self.tiling_prefetch = params['use_prefetch'] # Mat-vec AST cache self.asts = {} if self.tofile: # File output streams platform = os.environ.get('NODENAME', 'unknown') tmpdir = os.environ['TMPDIR'] base = os.path.join(tmpdir, 'output', platform, 'p%d' % self.degree, 'uf%d' % self.tiling_uf) if op2.MPI.COMM_WORLD.rank == 0: if not os.path.exists(base): os.makedirs(base) sub_dirs = [ d for d in os.listdir(base) if os.path.isdir(os.path.join(base, d)) ] sub_dir = "%d_em%d_part%s_tile%s" % ( len(sub_dirs), self.tiling_explicit_id, self.tiling_size if self.tiling_uf else 0, self.tiling_part if self.tiling_uf else 'None') base = os.path.join(base, sub_dir) os.makedirs(base) op2.MPI.COMM_WORLD.barrier() base = op2.MPI.COMM_WORLD.bcast(base, root=0) self.u_stream = File(os.path.join(base, 'velocity.pvd')) self.s_stream = File(os.path.join(base, 'stress.pvd'))
'explicit_mode': args.explicit_mode, 'explicit_mode_id': args.explicit_mode, 'use_glb_maps': args.glb_maps, 'use_prefetch': args.prefetch, 'log': args.log, 'tofile': args.tofile, 'verbose': args.verbose } # Set the kernel optimizaation level (default: O2) parameters['coffee']['optlevel'] = args.coffee_opt # Is it just a run to check correctness? if args.check: Lx, Ly, h, time_max, tolerance = 20, 20, 2.5, 0.01, 1e-10 info("Checking correctness of original and tiled versions, with:") info(" (Lx, Ly, T, tolerance)=%s" % str( (Lx, Ly, time_max, tolerance))) info(" %s" % params) # Run the tiled variant u1, s1 = ExplosiveSourceLF4().explosive_source_lf4( time_max, Lx, Ly, h, sys.maxint, params) # Run the original code original = { 'num_unroll': 0, 'tile_size': 0, 'mode': None, 'partitioning': 'chunk', 'extra_halo': 0 } u1_orig, s1_orig = ExplosiveSourceLF4().explosive_source_lf4(