def assemble_ufl_form(self,ufl_form,vertical_bcs=False): '''Assemble the matrix form a UFL form. In each cell of the extruded mesh, build the local matrix stencil associated with the UFL form. Then call _assemble_lma() to loop over all cells and assemble into the banded matrix. If the flag ``vertical_bcs`` is set, then homogeneous boundary conditions on the top and bottom surfaces are assumed (on the column space). :arg ufl_form: UFL form to assemble :arg vertical_bcs: Apply homogeneous Dirichlet boundary conditions on the top and bottom surfaces. ''' with timed_region('bandedmatrix compile_ufl_form'): compiled_form = compile_form(ufl_form, 'ufl_form')[0] kernel = compiled_form[6] coords = compiled_form[3] coefficients = compiled_form[4] arguments = ufl_form.arguments() assert len(arguments) == 2, 'Not a bilinear form' nrow = arguments[0].cell_node_map().arity ncol = arguments[1].cell_node_map().arity V_lma = FunctionSpace(self._mesh,'DG',0) lma = Function(V_lma, val=op2.Dat(V_lma.node_set**(nrow*ncol))) args = [lma.dat(op2.INC, lma.cell_node_map()[op2.i[0]]), coords.dat(op2.READ, coords.cell_node_map(), flatten=True)] for c in coefficients: args.append(c.dat(op2.READ, c.cell_node_map(), flatten=True)) with timed_region('bandedmatrix assemble_ufl_form'): op2.par_loop(kernel,lma.cell_set, *args) self._assemble_lma(lma,vertical_bcs)
def apply(self,phi): '''Apply operator. Apply the operator :math:`\hat{H}` to a field :math:`\phi` in a matrix free way by applying the individual components in turn and return the result :math:`\hat{H}\phi`. :arg phi: Pressure field :math:`\phi` to apply the operator to ''' with timed_region('apply_Hhat_level_'+str(self._level)): self._phi_tmp.assign(phi) with timed_region('apply_Hhat_h_level_'+str(self._level)): if (self._preassemble_horizontal): t_start = time.time() with self._BT_B_h_phi.dat.vec as v: with phi.dat.vec_ro as x: self._mat_Hhat_h.mult(x,v) t_end = time.time() ParLoop.perfdata[self._ax_label].add_timing(t_end - t_start) else: # Calculate action of B_h assemble(self._B_h_phi_form, tensor=self._B_h_phi) # divide by horizontal velocity mass matrix self._Mu_h.divide(self._B_h_phi) # Calculate action of B_h^T assemble(self._BT_B_h_phi_form, tensor=self._BT_B_h_phi) with timed_region('apply_Hhat_z_level_'+str(self._level)): self._Hhat_v._label='apply_Hhat_z_level_'+str(self._level) self._Hhat_v.ax(self._phi_tmp) return assemble(self._phi_tmp + self._omega_c2*self._BT_B_h_phi)
def callback(self): """Finish initialisation.""" del self._callback if op2.MPI.comm.size > 1: self._plex.distributeOverlap(1) self._grown_halos = True if reorder: with timed_region("Mesh: reorder"): old_to_new = self._plex.getOrdering(PETSc.Mat.OrderingType.RCM).indices reordering = np.empty_like(old_to_new) reordering[old_to_new] = np.arange(old_to_new.size, dtype=old_to_new.dtype) else: # No reordering reordering = None self._did_reordering = bool(reorder) # Mark OP2 entities and derive the resulting Plex renumbering with timed_region("Mesh: renumbering"): dmplex.mark_entity_classes(self._plex) self._entity_classes = dmplex.get_entity_classes(self._plex) self._plex_renumbering = dmplex.plex_renumbering(self._plex, self._entity_classes, reordering) with timed_region("Mesh: cell numbering"): # Derive a cell numbering from the Plex renumbering entity_dofs = np.zeros(dim + 1, dtype=np.int32) entity_dofs[-1] = 1 self._cell_numbering = self._plex.createSection([1], entity_dofs, perm=self._plex_renumbering) entity_dofs[:] = 0 entity_dofs[0] = 1 self._vertex_numbering = self._plex.createSection([1], entity_dofs, perm=self._plex_renumbering)
def forward_elimination(self, pc, x): """ """ with timed_region("HybridBreak"): with self.unbroken_residual.dat.vec_wo as v: x.copy(v) # Transfer unbroken_rhs into broken_rhs # NOTE: Scalar space is already "broken" so no need for # any projections unbroken_scalar_data = self.unbroken_residual.split()[self.pidx] broken_scalar_data = self.broken_residual.split()[self.pidx] unbroken_scalar_data.dat.copy(broken_scalar_data.dat) # Assemble the new "broken" hdiv residual # We need a residual R' in the broken space that # gives R'[w] = R[w] when w is in the unbroken space. # We do this by splitting the residual equally between # basis functions that add together to give unbroken # basis functions. unbroken_res_hdiv = self.unbroken_residual.split()[self.vidx] broken_res_hdiv = self.broken_residual.split()[self.vidx] broken_res_hdiv.assign(0) par_loop(self.average_kernel, ufl.dx, {"w": (self.weight, READ), "vec_in": (unbroken_res_hdiv, READ), "vec_out": (broken_res_hdiv, INC)}) with timed_region("HybridRHS"): # Compute the rhs for the multiplier system self._assemble_Srhs()
def solve(self): """ Apply the solver with rhs state.xrhs and result state.dy. """ # Solve the hybridized system self.hybridized_solver.solve() broken_u, rho1, _ = self.urhol0.split() u1 = self.u_hdiv # Project broken_u into the HDiv space u1.assign(0.0) with timed_region("Gusto:HybridProjectHDiv"): par_loop(self._average_kernel, dx, {"w": (self._weight, READ), "vec_in": (broken_u, READ), "vec_out": (u1, INC)}) # Reapply bcs to ensure they are satisfied for bc in self.bcs: bc.apply(u1) # Copy back into u and rho cpts of dy u, rho, theta = self.state.dy.split() u.assign(u1) rho.assign(rho1) # Reconstruct theta with timed_region("Gusto:ThetaRecon"): self.theta_solver.solve() # Copy into theta cpt of dy theta.assign(self.theta)
def apply(self, pc, x, y): """Solve the reduced system for the Lagrange multipliers. The system is assembled using operators constructed from the Slate expressions in the initialize method of this PC. Recovery of the scalar and flux fields are assembled cell-wise from Slate expressions describing the local problem. """ with self.residual.dat.vec_wo as v: x.copy(v) with timed_region("HybridSCRHS"): # Now assemble residual for the reduced problem self._assemble_Srhs() with timed_region("HybridSCSolve"): # Solve the system for the Lagrange multipliers with self.r_lambda.dat.vec_ro as b: if self.trace_ksp.getInitialGuessNonzero(): acc = self.solution.split()[2].dat.vec else: acc = self.solution.split()[2].dat.vec_wo with acc as x_trace: self.trace_ksp.solve(b, x_trace) with timed_region("HybridSCReconstruct"): # Recover u_h and q_h self._assemble_u() self._assemble_q() with self.solution.dat.vec_ro as w: w.copy(y)
def __init__(self, mesh, family, degree, dimension, output=True): r""" Initialise a new elastic wave simulation. :param mesh: The underlying computational mesh of vertices and edges. :param str family: Specify whether CG or DG should be used. :param int degree: Use polynomial basis functions of this degree. :param int dimension: The spatial dimension of the problem (1, 2 or 3). :param bool output: If True, output the solution fields to a file. :returns: None """ with timed_region('function setup'): self.mesh = mesh self.dimension = dimension self.output = output self.S = TensorFunctionSpace(mesh, family, degree, name='S') self.U = VectorFunctionSpace(mesh, family, degree, name='U') # Assumes that the S and U function spaces are the same. dofs = self.mesh.comm.allreduce(self.S.dof_count, op=mpi4py.MPI.SUM) log("Number of degrees of freedom: %d" % dofs) self.s = TrialFunction(self.S) self.v = TestFunction(self.S) self.u = TrialFunction(self.U) self.w = TestFunction(self.U) self.s0 = Function(self.S, name="StressOld") self.sh1 = Function(self.S, name="StressHalf1") self.stemp = Function(self.S, name="StressTemp") self.sh2 = Function(self.S, name="StressHalf2") self.s1 = Function(self.S, name="StressNew") self.u0 = Function(self.U, name="VelocityOld") self.uh1 = Function(self.U, name="VelocityHalf1") self.utemp = Function(self.U, name="VelocityTemp") self.uh2 = Function(self.U, name="VelocityHalf2") self.u1 = Function(self.U, name="VelocityNew") self.absorption_function = None self.source_function = None self.source_expression = None self._dt = None self._density = None self._mu = None self._l = None self.n = FacetNormal(self.mesh) self.I = Identity(self.dimension) # Inverse mass matrices for explicit methods self.invmass_velocity = None self.invmass_stress = None if self.output: with timed_region('i/o'): # File output streams self.u_stream = File("velocity.pvd") self.s_stream = File("stress.pvd")
def run(self, T): """ Run the elastic wave simulation until t = T. :param float T: The finish time of the simulation. :returns: The final solution fields for velocity and stress. """ # Write out the initial condition. self.write(self.u1, self.s1) # Call solver-specific setup self.setup() with timed_region('timestepping'): t = self.dt while t <= T + 1e-12: log("t = %f" % t) with self.loop_context(): # In case the source is time-dependent, update the time 't' here. if(self.source): with timed_region('source term update'): self.source_expression.t = t self.source = self.source_expression # Solve for the velocity vector field. with timed_region('velocity solve'): self.solve(self.ctx_uh1, self.invmass_velocity, self.uh1) self.solve(self.ctx_stemp, self.invmass_stress, self.stemp) self.solve(self.ctx_uh2, self.invmass_velocity, self.uh2) self.solve(self.ctx_u1, self.invmass_velocity, self.u1) self.u0.assign(self.u1) # Solve for the stress tensor field. with timed_region('stress solve'): self.solve(self.ctx_sh1, self.invmass_stress, self.sh1) self.solve(self.ctx_utemp, self.invmass_velocity, self.utemp) self.solve(self.ctx_sh2, self.invmass_stress, self.sh2) self.solve(self.ctx_s1, self.invmass_stress, self.s1) self.s0.assign(self.s1) # Execute the above scheduled Parloops _trace.evaluate_all() # Write out the new fields self.write(self.u1, self.s1) # Move onto next timestep t += self.dt return self.u1, self.s1
def backward_substitution(self, pc, y): """Perform the backwards recovery of eliminated fields. :arg pc: a Preconditioner instance. :arg y: a PETSc vector for placing the resulting fields. """ # We assemble the unknown which is an expression # of the first eliminated variable. self._sub_unknown() # Recover the eliminated unknown self._elim_unknown() with timed_region("HybridProject"): # Project the broken solution into non-broken spaces broken_pressure = self.broken_solution.split()[self.pidx] unbroken_pressure = self.unbroken_solution.split()[self.pidx] broken_pressure.dat.copy(unbroken_pressure.dat) # Compute the hdiv projection of the broken hdiv solution broken_hdiv = self.broken_solution.split()[self.vidx] unbroken_hdiv = self.unbroken_solution.split()[self.vidx] unbroken_hdiv.assign(0) par_loop(self.average_kernel, ufl.dx, {"w": (self.weight, READ), "vec_in": (broken_hdiv, READ), "vec_out": (unbroken_hdiv, INC)}, is_loopy_kernel=True) with self.unbroken_solution.dat.vec_ro as v: v.copy(y)
def vcycle(self,level=None): '''Recursive implementation of multigrid V-cycle. :arg level: multigrid level, if None, start on finest level. ''' if (level == None): level = self._fine_level with timed_region("vcycle_level_"+str(level)): # Solve exactly on coarsest level if (level == self._coarsest_level): # presmooth self._coarsegrid_solver.solve(self._rhs[level],self._phi[level]) else: # Recursion on all other levels # Only initialise solution to zero on the coarser levels # Presmoother self._presmoother_hierarchy[level].smooth(self._rhs[level], self._phi[level], initial_phi_is_zero=True) self._residual[level].assign(self._operator_hierarchy[level].residual( self._rhs[level], self._phi[level])) # Restrict residual to RHS on coarser level self._residual.restrict(level) self._rhs[level-1].assign(self._residual[level-1]) # Recursive call self.vcycle(level-1) # Prolongate and add coarse grid correction self._residual[level-1].assign(self._phi[level-1]) self._residual.prolong(level-1) self._phi[level].assign(self._residual[level]+self._phi[level]) # Postsmooth self._postsmoother_hierarchy[level].smooth(self._rhs[level], self._phi[level], initial_phi_is_zero=False)
def backward_substitution(self, pc, y): """ """ # We assemble the unknown which is an expression # of the first eliminated variable. self._sub_unknown() # Recover the eliminated unknown self._elim_unknown() with timed_region("HybridProject"): # Project the broken solution into non-broken spaces broken_pressure = self.broken_solution.split()[self.pidx] unbroken_pressure = self.unbroken_solution.split()[self.pidx] broken_pressure.dat.copy(unbroken_pressure.dat) # Compute the hdiv projection of the broken hdiv solution broken_hdiv = self.broken_solution.split()[self.vidx] unbroken_hdiv = self.unbroken_solution.split()[self.vidx] unbroken_hdiv.assign(0) par_loop(self.average_kernel, ufl.dx, {"w": (self.weight, READ), "vec_in": (broken_hdiv, READ), "vec_out": (unbroken_hdiv, INC)}) with self.unbroken_solution.dat.vec_ro as v: v.copy(y)
def inspect(self): """Inspect the loop chain and produce a :class:`Schedule`.""" if self._initialized: # An inspection plan is in cache. return self._schedule elif self._heuristic_skip_inspection(): # Not in cache, and too premature for running a potentially costly inspection del self._name del self._loop_chain del self._mode del self._options return self._schedule # Is `mode` legal ? if self.mode not in Inspector._modes: raise RuntimeError("Inspection accepts only %s fusion modes", Inspector._modes) with timed_region("ParLoopChain `%s`: inspector" % self._name): if self.mode in ['soft', 'hard', 'tile']: self._soft_fuse() if self.mode in ['hard', 'tile']: self._hard_fuse() if self.mode in ['tile', 'only_tile', 'only_omp']: self._tile() # A schedule has been computed. The Inspector is initialized and therefore # retrievable from cache. We then blow away everything we don't need any more. self._initialized = True del self._name del self._loop_chain del self._mode del self._options return self._schedule
def inv_diagonal(self): '''Extract inverse diagonal entries. For a banded matrix with alpha=beta, create a new banded matrix which contains the inverse diagonal entries only. ''' assert(self._alpha == self._beta) assert(self._gamma_m>=0) assert(self._gamma_p>=0) result = BandedMatrix(self._fs_row,self._fs_col, alpha=1,beta=1,gamma_m=0,gamma_p=0) kernel_code = '''void diagonal(double **A, double **Adiag) { for (int i=0;i<%(A_n_row)d;++i) { int j_m = (int) ceil((%(A_alpha)d*i-%(A_gamma_p)d)/%(A_beta)f); Adiag[0][i] = 1.0/A[0][%(A_bandwidth)d*i+(i-j_m)]; } }''' param_dict = {'A_'+x:y for (x,y) in self._param_dict.iteritems()} kernel = op2.Kernel(kernel_code % param_dict, 'diagonal') with timed_region('bandedmatrix inv_diagonal'): op2.par_loop(kernel, self._hostmesh.cell_set, self._data(op2.READ,self._Vcell.cell_node_map()), result._data(op2.WRITE,self._Vcell.cell_node_map())) return result
def apply(self, pc, x, y): """Applies the static condensation preconditioner. :arg pc: a Preconditioner instance. :arg x: A PETSc vector containing the incoming right-hand side. :arg y: A PETSc vector for the result. """ with timed_region("SCForwardElim"): self.forward_elimination(pc, x) with timed_region("SCSolve"): self.sc_solve(pc) with timed_region("SCBackSub"): self.backward_substitution(pc, y)
def _lu_solve(self,u): '''In-place LU solve for a field u. :arg u: Function to be solved for. ''' kernel_code = '''void lu_solve(double **LU, int **ipiv, double **u) { LAPACKE_dgbtrs_work(LAPACK_COL_MAJOR,'N', %(A_n_row)d,%(A_gamma_p)d,%(A_gamma_m)d,1, LU[0],%(A_bandwidth)d+%(A_gamma_p)d,ipiv[0], u[0],%(A_n_row)d); }''' param_dict = {'A_'+x:y for (x,y) in self._param_dict.iteritems()} kernel = op2.Kernel(kernel_code % param_dict, 'lu_solve', cpp=True, headers=['#include "lapacke.h"'], include_dirs=self._include_dirs, lib_dirs=self._lib_dirs, libs=self._libs) with timed_region('bandedmatrix lu_solve'): op2.par_loop(kernel, self._hostmesh.cell_set, self._lu(op2.READ,self._Vcell.cell_node_map()), self._ipiv(op2.READ,self._Vcell.cell_node_map()), u.dat(op2.RW,u.cell_node_map()), name='bandedmatrix lu_solve['+self._label+']') return u
def backward_substitution(self, pc, y): """Perform the backwards recovery of eliminated fields. :arg pc: a Preconditioner instance. :arg y: a PETSc vector for placing the resulting fields. """ # We assemble the unknown which is an expression # of the first eliminated variable. self._sub_unknown() # Recover the eliminated unknown self._elim_unknown() with timed_region("HybridProject"): # Project the broken solution into non-broken spaces broken_pressure = self.broken_solution.split()[self.pidx] unbroken_pressure = self.unbroken_solution.split()[self.pidx] broken_pressure.dat.copy(unbroken_pressure.dat) # Compute the hdiv projection of the broken hdiv solution broken_hdiv = self.broken_solution.split()[self.vidx] unbroken_hdiv = self.unbroken_solution.split()[self.vidx] unbroken_hdiv.assign(0) par_loop(self.average_kernel, ufl.dx, { "w": (self.weight, READ), "vec_in": (broken_hdiv, READ), "vec_out": (unbroken_hdiv, INC) }, is_loopy_kernel=True) with self.unbroken_solution.dat.vec_ro as v: v.copy(y)
def dense(self): '''Convert to a dense matrix format. Return the matrix in a dense format, i.e. a n_col x n_row matrix in every vertical column. This should mainly be used for debugging since the matrix is sparse and the routine will return huge dense matrices for larger meshes. ''' A_dense = Function(self._Vcell, val=op2.Dat(self._Vcell.node_set**(self._n_row,self._n_col))) kernel_code = '''void convert_to_dense(double **A, double **B) { for (int i=0;i<%(A_n_row)d;++i) { int j_m = (int) ceil((%(A_alpha)d*i-%(A_gamma_p)d)/(1.0*%(A_beta)f)); int j_p = (int) floor((%(A_alpha)d*i+%(A_gamma_m)d)/(1.0*%(A_beta)f)); for (int j=std::max(0,j_m);j<std::min(%(A_n_col)d,j_p+1);++j) { B[0][%(A_n_col)d*i+j] = A[0][%(A_bandwidth)d*i+(j-j_m)]; } } }''' param_dict = {'A_'+x:y for (x,y) in self._param_dict.iteritems()} kernel = op2.Kernel(kernel_code % param_dict, 'convert_to_dense',cpp=True) with timed_region('bandedmatrix dense'): op2.par_loop(kernel, self._hostmesh.cell_set, self._data(op2.READ,self._Vcell.cell_node_map()), A_dense.dat(op2.WRITE,self._Vcell.cell_node_map())) return A_dense
def solve(self,b,phi): '''Solve approximately. Solve the pressure correction equation approximately for a given right hand side :math:`b` with a V-cycle. Note that the state vector is updated in place. :arg b: right hand side in pressure space :arg phi: State :math:`\phi` in pressure space. ''' with timed_region("vcycle_level_"+str(self._hmultigrid._fine_level+1)): phi.assign(0.0) # Presmooth self._presmoother.smooth(b,phi,initial_phi_is_zero=True) # Calculuate residual... self._residual = self._operator.residual(b,phi) # ... and restrict to RHS in lowest order space self.restrict(self._residual,self._rhs_low) # h-multigrid in lower order space self._hmultigrid.solve(self._rhs_low,self._dphi_low) # Prolongate correction back to higher order space... # ... and add to solution in higher order space self.prolongadd(self._dphi_low,phi) # Postsmooth self._postsmoother.smooth(b,phi,initial_phi_is_zero=False)
def _init_block(self): self._blocks = [[self]] rset, cset = self.sparsity.dsets if (isinstance(rset, GlobalDataSet) or isinstance(cset, GlobalDataSet)): self._init_global_block() return mat = PETSc.Mat() row_lg = rset.lgmap col_lg = cset.lgmap rdim, cdim = self.dims[0][0] if rdim == cdim and rdim > 1 and self.sparsity._block_sparse: # Size is total number of rows and columns, but the # /sparsity/ is the block sparsity. block_sparse = True create = mat.createBAIJ else: # Size is total number of rows and columns, sparsity is # the /dof/ sparsity. block_sparse = False create = mat.createAIJ create(size=((self.nrows, None), (self.ncols, None)), nnz=(self.sparsity.nnz, self.sparsity.onnz), bsize=(rdim, cdim), comm=self.comm) mat.setLGMap(rmap=row_lg, cmap=col_lg) # Stash entries destined for other processors mat.setOption(mat.Option.IGNORE_OFF_PROC_ENTRIES, False) # Any add or insertion that would generate a new entry that has not # been preallocated will raise an error mat.setOption(mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) # Do not ignore zeros while we fill the initial matrix so that # petsc doesn't compress things out. if not block_sparse: mat.setOption(mat.Option.IGNORE_ZERO_ENTRIES, False) # When zeroing rows (e.g. for enforcing Dirichlet bcs), keep those in # the nonzero structure of the matrix. Otherwise PETSc would compact # the sparsity and render our sparsity caching useless. mat.setOption(mat.Option.KEEP_NONZERO_PATTERN, True) # We completely fill the allocated matrix when zeroing the # entries, so raise an error if we "missed" one. mat.setOption(mat.Option.UNUSED_NONZERO_LOCATION_ERR, True) # Put zeros in all the places we might eventually put a value. with timed_region("MatZeroInitial"): sparsity.fill_with_zeros(mat, self.sparsity.dims[0][0], self.sparsity.maps, self.sparsity.iteration_regions, set_diag=self.sparsity._has_diagonal) mat.assemble() mat.setOption(mat.Option.NEW_NONZERO_LOCATION_ERR, True) # Now we've filled up our matrix, so the sparsity is # "complete", we can ignore subsequent zero entries. if not block_sparse: mat.setOption(mat.Option.IGNORE_ZERO_ENTRIES, True) self.handle = mat
def solve(self): """ Apply the solver with rhs state.xrhs and result state.dy. """ with timed_region("Gusto:VelocityDensitySolve"): self.urho_solver.solve() u1, rho1 = self.urho.split() u, rho, theta = self.state.dy.split() u.assign(u1) rho.assign(rho1) with timed_region("Gusto:ThetaRecon"): self.theta_solver.solve() theta.assign(self.theta)
def solve(self): """ Apply the solver with rhs state.xrhs and result state.dy. """ with timed_region("Gusto:VelocityPressureSolve"): self.up_solver.solve() u1, p1 = self.up.split() u, p, b = self.state.dy.split() u.assign(u1) p.assign(p1) with timed_region("Gusto:BuoyancyRecon"): self.b_solver.solve() b.assign(self.b)
def matmul(self,other,result=None): '''Calculate matrix product :math:`C=AB`. Multiply this matrix by the banded matrix :math:`B` from the right and store the result in the matrix :math:`C`, which is returned on exit. If result is None, allocate a new matrix, otherwise write data to already allocated matrix ``result``. :arg other: matrix :math:`B` to multiply with :arg result: resulting matrix :math:`C` ''' # Check that matrices can be multiplied assert (self._n_col == other._n_row) if (result != None): assert(result._n_row == self._n_row) assert(result._n_col == other._n_col) else: alpha = self.alpha * other.alpha beta = self.beta * other.beta gamma_m = other.alpha * self.gamma_m + self.beta*other.gamma_m gamma_p = other.alpha * self.gamma_p + self.beta*other.gamma_p result = BandedMatrix(self._fs_row,other._fs_col, alpha=alpha,beta=beta, gamma_m=gamma_m,gamma_p=gamma_p) param_dict = {} for label, matrix in zip(('A','B','C'),(self,other,result)): param_dict.update({label+'_'+x:y for (x,y) in matrix._param_dict.iteritems()}) kernel_code = '''void matmul(double **A, double **B, double **C) { for (int i=0;i<%(C_n_row)d;++i) { int j_m = (int) ceil((%(C_alpha)d*i-%(C_gamma_p)d)/(1.0*%(C_beta)f)); int j_p = (int) floor((%(C_alpha)d*i+%(C_gamma_m)d)/(1.0*%(C_beta)f)); int k_m = (int) ceil((%(A_alpha)d*i-%(A_gamma_p)d)/(1.0*%(A_beta)f)); int k_p = (int) floor((%(A_alpha)d*i+%(A_gamma_m)d)/(1.0*%(A_beta)f)); for (int j=std::max(0,j_m);j<std::min(%(C_n_col)d,j_p+1);++j) { double s = 0.0; for (int k=std::max(0,k_m);k<std::min(%(A_n_col)d,k_p+1);++k) { if ( (ceil((%(B_alpha)d*k-%(B_gamma_p)d)/%(B_beta)f) <= j) && (j <= floor((%(B_alpha)d*k+%(B_gamma_m)d)/(1.0*%(B_beta)f))) ) { int j_m_B = (int) ceil((%(B_alpha)d*k-%(B_gamma_p)d)/(1.0*%(B_beta)f)); s += A[0][%(A_bandwidth)d*i+(k-k_m)] * B[0][%(B_bandwidth)d*k+(j-j_m_B)]; } } C[0][%(C_bandwidth)d*i+(j-j_m)] = s; } } }''' kernel = op2.Kernel(kernel_code % param_dict, 'matmul',cpp=True) with timed_region('bandedmatrix matmul'): op2.par_loop(kernel, self._hostmesh.cell_set, self._data(op2.READ,self._Vcell.cell_node_map()), other._data(op2.READ,self._Vcell.cell_node_map()), result._data(op2.WRITE,self._Vcell.cell_node_map())) return result
def _lu_decompose(self): '''Construct LU decomposition :math:`A=LU` on the fly. If the matrix is tridiagonal, do not LU-decompose Replace A by matrix which stores the lower (L) and upper (U) factors of the factorisation, where L is assumened to have ones on the diagonal. ''' # Number of super-diagonals (ku): gamma_m # Number of sub-diagonals (kl): gamma_p # Storage for LU decomposition is n_{row} * (1+ku+kl)+kl # (see http://www.netlib.org/lapack/lug/node124.html and # documentation of DGBTRF http://phase.hpcc.jp/mirrors/netlib/lapack/double/dgbtrf.f) # The LAPACKe C-interface to LAPACK is used, see # http://www.netlib.org/lapack/lapacke.html if (self.is_tridiagonal): return assert (self._n_row == self._n_col) lda = self.bandwidth+self.gamma_p if (not self._lu): self._lu = op2.Dat(self._Vcell.node_set**(lda * self._n_row)) self._lu.zero() if (not self._ipiv): self._ipiv = op2.Dat(self._Vcell.node_set**(self._n_row),dtype=np.int32) # Copy data into array which will be LU decomposed. kernel_code = '''void lu_decompose(double **A, double **LU, int **ipiv) { // Step 1: write to column-major LU matrix for (int i=0;i<%(A_n_row)d;++i) { int j_m = (int) ceil((%(A_alpha)d*i-%(A_gamma_p)d)/%(A_beta)f); int j_p = (int) floor((%(A_alpha)d*i+%(A_gamma_m)d)/%(A_beta)f); for (int j=std::max(0,j_m);j<std::min(%(A_n_col)d,j_p+1);++j) { LU[0][%(A_bandwidth)d-1+(i-j)+(%(A_bandwidth)d+%(A_gamma_p)d)*j] = A[0][%(A_bandwidth)d*i+(j-j_m)]; } } // Step 2: Call LAPACK's DGBTRF routine to LU decompose the matrix LAPACKE_dgbtrf_work(LAPACK_COL_MAJOR, %(A_n_row)d,%(A_n_row)d, %(A_gamma_p)d,%(A_gamma_m)d, LU[0],%(A_bandwidth)d+%(A_gamma_p)d,ipiv[0]); }''' param_dict = {'A_'+x:y for (x,y) in self._param_dict.iteritems()} kernel = op2.Kernel(kernel_code % param_dict, 'lu_decompose', cpp=True, headers=['#include "lapacke.h"'], include_dirs=self._include_dirs, lib_dirs=self._lib_dirs, libs=self._libs) with timed_region('bandedmatrix lu_decompose'): op2.par_loop(kernel, self._hostmesh.cell_set, self._data(op2.READ,self._Vcell.cell_node_map()), self._lu(op2.WRITE,self._Vcell.cell_node_map()), self._ipiv(op2.WRITE,self._Vcell.cell_node_map())) self._lu_version = self._data._version
def solve(self, xrhs, dy): """ Apply the solver with rhs xrhs and result dy. """ self.xrhs.assign(xrhs) with timed_region("Gusto:VelocityPressureSolve"): self.up_solver.solve() u1, p1 = self.up.split() u, p, b = dy.split() u.assign(u1) p.assign(p1) with timed_region("Gusto:BuoyancyRecon"): self.b_solver.solve() b.assign(self.b)
def _reconstruct(self): """Locally solve for the interior degrees of freedom using the computed unknowns for the facets. A transfer kernel is used to join the interior and facet solutions together. """ with timed_region("SCAssembleInterior"): self._assemble_interior_u() u_int = self.interior_solution u_facet = self.trace_solution with timed_region("SCReconSolution"): par_loop( self._transfer_kernel.join, ufl.dx, { "x": (self.h1_solution, WRITE), "x_int": (u_int, READ), "x_facet": (u_facet, READ) })
def apply(self, pc, x, y): """Solve the reduced system for the facet degrees of freedom after static condensation is applied. The computed solution is used to solve element-wise problems for the interior degrees of freedom. """ with timed_region("SCTransfer"): with self.h1_residual.dat.vec_wo as v: x.copy(v) # Partition residual data into interior and facet sections self._partition_residual() # Now that the residual data is transfered, we assemble # the RHS for the reduced system with timed_region("SCRHS"): self._assemble_sc_rhs_thunk() # Assemble the RHS of the reduced system: # If r = [F, G] is the incoming residual separated # into "facet" and "interior" restrictions, then # the Schur complement RHS is: # G - A10 * A00.inv * F. # This is assembled point-wise, with -A10 * A00.inv * F # precomputed element-wise using Slate. self.sc_rhs.assign(self.trace_residual + self.sc_rhs_thunk) with timed_region("SCSolve"): # Solve the reduced problem with self.sc_rhs.dat.vec_ro as b: if self.sc_ksp.getInitialGuessNonzero(): acc = self.trace_solution.dat.vec else: acc = self.trace_solution.dat.vec_wo with acc as x_trace: self.sc_ksp.solve(b, x_trace) with timed_region("SCRecover"): self._reconstruct() with self.h1_solution.dat.vec_ro as v: v.copy(y)
def callback(self): """Finish initialisation.""" del self._callback if op2.MPI.comm.size > 1: self._plex.distributeOverlap(1) self._grown_halos = True if reorder: with timed_region("Mesh: reorder"): old_to_new = self._plex.getOrdering(PETSc.Mat.OrderingType.RCM).indices reordering = np.empty_like(old_to_new) reordering[old_to_new] = np.arange(old_to_new.size, dtype=old_to_new.dtype) else: # No reordering reordering = None self._did_reordering = bool(reorder) # Mark OP2 entities and derive the resulting Plex renumbering with timed_region("Mesh: renumbering"): dmplex.mark_entity_classes(self._plex) self._entity_classes = dmplex.get_entity_classes(self._plex) self._plex_renumbering = dmplex.plex_renumbering(self._plex, self._entity_classes, reordering) with timed_region("Mesh: cell numbering"): # Derive a cell numbering from the Plex renumbering entity_dofs = np.zeros(dim+1, dtype=np.int32) entity_dofs[-1] = 1 self._cell_numbering = self._plex.createSection([1], entity_dofs, perm=self._plex_renumbering) entity_dofs[:] = 0 entity_dofs[0] = 1 self._vertex_numbering = self._plex.createSection([1], entity_dofs, perm=self._plex_renumbering) entity_dofs[:] = 0 entity_dofs[-2] = 1 facet_numbering = self._plex.createSection([1], entity_dofs, perm=self._plex_renumbering) self._facet_ordering = dmplex.get_facet_ordering(self._plex, facet_numbering)
def solve(self,b,phi): '''Solve linear system :math:`H\phi = b`. :arg b: Right hand side :math:`b` in pressure space :arg phi: State vector :math:`\phi` in pressure space ''' with self._ksp_monitor, timed_region('matrixfree pc_schur'): with b.dat.vec_ro as v: self._rhs.array[:] = v.array[:] self._ksp.solve(self._rhs,self._u) with phi.dat.vec as v: v.array[:] = self._u.array[:]
def setup(self, *args): r""" Generate method-specific solver contexts for all forms.""" log("Creating solver contexts") with timed_region('solver setup'): self.ctx_uh1 = self.create_solver(self.form_uh1, self.uh1) self.ctx_stemp = self.create_solver(self.form_stemp, self.stemp) self.ctx_uh2 = self.create_solver(self.form_uh2, self.uh2) self.ctx_u1 = self.create_solver(self.form_u1, self.u1) self.ctx_sh1 = self.create_solver(self.form_sh1, self.sh1) self.ctx_utemp = self.create_solver(self.form_utemp, self.utemp) self.ctx_sh2 = self.create_solver(self.form_sh2, self.sh2) self.ctx_s1 = self.create_solver(self.form_s1, self.s1)
def write(self, u=None, s=None): r""" Write the velocity and/or stress fields to file. :param firedrake.Function u: The velocity field. :param firedrake.Function s: The stress field. :returns: None """ if self.output: with timed_region('i/o'): if(u): self.u_stream.write(u) if(s): self.s_stream.write(s)
def forward_elimination(self, pc, x): """Perform the forward elimination of fields and provide the reduced right-hand side for the condensed system. :arg pc: a Preconditioner instance. :arg x: a PETSc vector containing the incoming right-hand side. """ with timed_region("HybridBreak"): with self.unbroken_residual.dat.vec_wo as v: x.copy(v) # Transfer unbroken_rhs into broken_rhs # NOTE: Scalar space is already "broken" so no need for # any projections unbroken_scalar_data = self.unbroken_residual.split()[self.pidx] broken_scalar_data = self.broken_residual.split()[self.pidx] unbroken_scalar_data.dat.copy(broken_scalar_data.dat) # Assemble the new "broken" hdiv residual # We need a residual R' in the broken space that # gives R'[w] = R[w] when w is in the unbroken space. # We do this by splitting the residual equally between # basis functions that add together to give unbroken # basis functions. unbroken_res_hdiv = self.unbroken_residual.split()[self.vidx] broken_res_hdiv = self.broken_residual.split()[self.vidx] broken_res_hdiv.assign(0) par_loop(self.average_kernel, ufl.dx, { "w": (self.weight, READ), "vec_in": (unbroken_res_hdiv, READ), "vec_out": (broken_res_hdiv, INC) }, is_loopy_kernel=True) with timed_region("HybridRHS"): # Compute the rhs for the multiplier system self._assemble_Srhs()
def ax(self,u): '''In-place Matrix-vector mutiplication :math:`u\mapsto Au` :arg u: Function to multiply, on exit this contains result :math:`Au` ''' assert(u.function_space() == self._fs_col) assert(u.function_space() == self._fs_row) param_dict = {'A_'+x:y for (x,y) in self._param_dict.iteritems()} kernel_code = '''void ax(double **A, double **u) { // Copy vector into temporary array double u_tmp[%(A_n_row)d]; for (int i=0;i<%(A_n_row)d;++i) { u_tmp[i] = u[0][i]; } ''' if (self._alpha == self._beta) and self._use_blas_for_axpy: kernel_code +=''' cblas_dgbmv(CblasColMajor,CblasTrans, %(A_n_col)d,%(A_n_row)d, %(A_gamma_m)d,%(A_gamma_p)d, 1.0,A[0],%(A_bandwidth)d, u_tmp,1,0.0,u[0],1); ''' else: kernel_code += ''' // Loop over matrix rows for (int i=0;i<%(A_n_row)d;++i) { double s = 0; // Work out column loop bounds int j_m = (int) ceil((%(A_alpha)d*i-%(A_gamma_p)d)/%(A_beta)f); int j_p = (int) floor((%(A_alpha)d*i+%(A_gamma_m)d)/%(A_beta)f); // Loop over columns for (int j=std::max(0,j_m);j<std::min(%(A_n_col)d,j_p+1);++j) { s += A[0][%(A_bandwidth)d*i+(j-j_m)] * u_tmp[j]; } u[0][i] = s; } ''' kernel_code +='''}''' kernel = op2.Kernel(kernel_code % param_dict,'ax',cpp=True, headers=['#include "cblas.h"'], include_dirs=self._include_dirs, lib_dirs=self._lib_dirs, libs=self._libs) with timed_region('bandedmatrix ax'): op2.par_loop(kernel, self._hostmesh.cell_set, self._data(op2.READ,self._Vcell.cell_node_map()), u.dat(op2.RW,u.cell_node_map()), name='bandedmatrix ax['+self._label+']', measure_flops=(not self._use_blas_for_axpy))
def __init__(self,ufl_form,label=None): if (label == None): self._label='___' else: self._label=label self._ufl_form = ufl_form fs = [x.function_space() for x in self._ufl_form.arguments()] assert (fs[0] == fs[1]) self._W2 = fs[0] self._mesh = self._W2.mesh() self.project_solver_param = {'ksp_type':'cg', 'pc_type':'jacobi'} nlocaldof = self._W2.cell_node_map().arity V_cells = FunctionSpace(self._mesh,'DG',0) # Build local stencil of full mass matrix mass = self._ufl_form compiled_form = compile_form(mass, 'mass')[0] mass_kernel = compiled_form[6] coords = compiled_form[3] coefficients = compiled_form[4] arguments = mass.arguments() mass_matrix = Function(V_cells, val=op2.Dat(V_cells.node_set**(nlocaldof**2))) args = [mass_matrix.dat(op2.INC, mass_matrix.cell_node_map()[op2.i[0]]), coords.dat(op2.READ,coords.cell_node_map(),flatten=True)] for c in coefficients: args.append(c.dat(op2.READ, c.cell_node_map(), flatten=True)) with timed_region('assemble lumpedmass['+self._label+']'): op2.par_loop(mass_kernel,mass_matrix.cell_set,*args) self._data = Function(self._W2) assemble_diag_kernel = '''void assemble_diag(double **mass_matrix, double **lumped_mass_matrix) { for (int i=0; i<%(nlocaldof)d; ++i) { lumped_mass_matrix[i][0] += mass_matrix[0][(%(nlocaldof)d+1)*i]; } }''' assemble_diag_kernel = op2.Kernel(assemble_diag_kernel % {'nlocaldof':nlocaldof}, 'assemble_diag') op2.par_loop(assemble_diag_kernel, mass_matrix.cell_set, mass_matrix.dat(op2.READ, mass_matrix.cell_node_map()), self._data.dat(op2.INC, self._data.cell_node_map())) # Construct pointwise inverse self._data_inv = Function(self._W2) kernel_inv = '*data_inv = 1./(*data);' par_loop(kernel_inv,direct, {'data_inv':(self._data_inv,WRITE), 'data':(self._data,READ)})
def apply_blockinverse(self,r): '''In-place multiply with inverse of block-diagonal Apply :math:`r\mapsto \hat{H}_z^{-1} r` :arg r: Vector to be multiplied ''' with timed_region('apply_Hhat_z_inv_level_'+str(self._level)): self._vertical_diagonal._label='Hhat_z_level_'+str(self._level) if (self._vertical_diagonal.is_tridiagonal): self._vertical_diagonal._tridiagonal_solve(r) else: self._vertical_diagonal._lu_solve(r)
def forward_elimination(self, pc, x): """Perform the forward elimination of fields and provide the reduced right-hand side for the condensed system. :arg pc: a Preconditioner instance. :arg x: a PETSc vector containing the incoming right-hand side. """ with timed_region("HybridBreak"): with self.unbroken_residual.dat.vec_wo as v: x.copy(v) # Transfer unbroken_rhs into broken_rhs # NOTE: Scalar space is already "broken" so no need for # any projections unbroken_scalar_data = self.unbroken_residual.split()[self.pidx] broken_scalar_data = self.broken_residual.split()[self.pidx] unbroken_scalar_data.dat.copy(broken_scalar_data.dat) # Assemble the new "broken" hdiv residual # We need a residual R' in the broken space that # gives R'[w] = R[w] when w is in the unbroken space. # We do this by splitting the residual equally between # basis functions that add together to give unbroken # basis functions. unbroken_res_hdiv = self.unbroken_residual.split()[self.vidx] broken_res_hdiv = self.broken_residual.split()[self.vidx] broken_res_hdiv.assign(0) par_loop(self.average_kernel, ufl.dx, {"w": (self.weight, READ), "vec_in": (unbroken_res_hdiv, READ), "vec_out": (broken_res_hdiv, INC)}, is_loopy_kernel=True) with timed_region("HybridRHS"): # Compute the rhs for the multiplier system self._assemble_Srhs()
def callback(self): """Finish initialisation.""" del self._callback # Finish the initialisation of mesh topology self.topology.init() with timed_region("Mesh: coordinate field"): coordinates_fs = functionspace.VectorFunctionSpace(self.topology, "Lagrange", 1, dim=geometric_dim) coordinates_data = dmplex.reordered_coords( plex, coordinates_fs._global_numbering, (self.num_vertices(), geometric_dim) ) coordinates = function.CoordinatelessFunction(coordinates_fs, val=coordinates_data, name="Coordinates") self.__init__(coordinates)
def write(self, u=None, s=None, output=True): r""" Write the velocity and/or stress fields to file. :param firedrake.Function u: The velocity field. :param firedrake.Function s: The stress field. :returns: None """ _trace.evaluate_all() if output: with timed_region('i/o'): if (u): self.u_stream.write(u) if (s): # FIXME: Cannot currently write tensor valued fields to a VTU file. # See https://github.com/firedrakeproject/firedrake/issues/538 #self.s_stream << s pass
def _init_monolithic(self): mat = PETSc.Mat() rset, cset = self.sparsity.dsets if rset.cdim != 1: rlgmap = rset.unblocked_lgmap else: rlgmap = rset.lgmap if cset.cdim != 1: clgmap = cset.unblocked_lgmap else: clgmap = cset.lgmap mat.createAIJ(size=((self.nrows, None), (self.ncols, None)), nnz=(self.sparsity.nnz, self.sparsity.onnz), bsize=1, comm=self.comm) mat.setLGMap(rmap=rlgmap, cmap=clgmap) self.handle = mat self._blocks = [] rows, cols = self.sparsity.shape for i in range(rows): row = [] for j in range(cols): row.append(MatBlock(self, i, j)) self._blocks.append(row) mat.setOption(mat.Option.IGNORE_ZERO_ENTRIES, False) mat.setOption(mat.Option.KEEP_NONZERO_PATTERN, True) # We completely fill the allocated matrix when zeroing the # entries, so raise an error if we "missed" one. mat.setOption(mat.Option.UNUSED_NONZERO_LOCATION_ERR, True) mat.setOption(mat.Option.IGNORE_OFF_PROC_ENTRIES, False) mat.setOption(mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) # The first assembly (filling with zeros) sets all possible entries. mat.setOption(mat.Option.SUBSET_OFF_PROC_ENTRIES, True) # Put zeros in all the places we might eventually put a value. with timed_region("MatZeroInitial"): for i in range(rows): for j in range(cols): sparsity.fill_with_zeros( self[i, j].handle, self[i, j].sparsity.dims[0][0], self[i, j].sparsity.maps, set_diag=self[i, j].sparsity._has_diagonal) self[i, j].handle.assemble() mat.assemble() mat.setOption(mat.Option.NEW_NONZERO_LOCATION_ERR, True) mat.setOption(mat.Option.IGNORE_ZERO_ENTRIES, True)
def _assemble_lma(self,lma,vertical_bcs=False): '''Assemble the matrix from the LMA storage format. If the flag ``vertical_bcs`` is set, then homogeneous boundary conditions on the top and bottom surfaces are assumed (on the column space). :arg lma: Matrix in LMA storage format. :arg vertical_bcs: Apply homogeneous Dirichlet boundary conditions on the top and bottom surfaces. ''' param_dict = {'A_'+x:y for (x,y) in self._param_dict.iteritems()} kernel_code = ''' void assemble_lma(double **lma, double **A) { const int ndof_cell_row = %(A_ndof_cell_row)d; const int ndof_facet_row = %(A_ndof_facet_row)d; const int ndof_cell_col = %(A_ndof_cell_col)d; const int ndof_facet_col = %(A_ndof_facet_col)d; const int ndof_row = ndof_cell_row + 2*ndof_facet_row; const int ndof_col = ndof_cell_col + 2*ndof_facet_col; const int nodemap_row[%(A_n_nodemap_row)d] = %(A_nodemap_row)s; const int nodemap_col[%(A_n_nodemap_col)d] = %(A_nodemap_col)s; double *layer_lma = lma[0]; for (int celllayer=0;celllayer<%(A_ncelllayers)d;++celllayer) { // Loop over local vertical dofs in row space for (int i_local=0;i_local<ndof_row;++i_local) { // Loop over local vertical dofs in column space for (int j_local=0;j_local<ndof_col;++j_local) { // Work out global vertical indices (for accessing A) int i = celllayer*(ndof_cell_row+ndof_facet_row)+nodemap_row[i_local]; int j = celllayer*(ndof_cell_col+ndof_facet_col)+nodemap_col[j_local]; int j_m = (int) ceil((%(A_alpha)d*i-%(A_gamma_p)d)/%(A_beta)f); A[0][%(A_bandwidth)d*i+(j-j_m)] += layer_lma[i_local * ndof_col + j_local]; } } // point to next vertical layer layer_lma += ndof_row * ndof_col; } }''' self._data.zero() kernel = op2.Kernel(kernel_code % param_dict,'assemble_lma',cpp=True) with timed_region('bandedmatrix assemble_lma'): op2.par_loop(kernel, self._hostmesh.cell_set, lma.dat(op2.READ,lma.cell_node_map()), self._data(op2.INC,self._Vcell.cell_node_map())) if (vertical_bcs): self.apply_vertical_bcs()
def axpy(self,u,v): '''axpy Matrix-vector mutiplication :math:`v\mapsto v+Au` :arg u: Vector to multiply :arg v: Resulting vector ''' assert(u.function_space() == self._fs_col) assert(v.function_space() == self._fs_row) param_dict = {'A_'+x:y for (x,y) in self._param_dict.iteritems()} if (self._alpha == self._beta) and self._use_blas_for_axpy: kernel_code = '''void axpy(double **A, double **u, double **v) { cblas_dgbmv(CblasColMajor,CblasTrans, %(A_n_col)d,%(A_n_row)d, %(A_gamma_m)d,%(A_gamma_p)d, 1.0,A[0],%(A_bandwidth)d, u[0],1,1.0,v[0],1); }''' else: kernel_code = '''void axpy(double **A, double **u, double **v) { // Loop over matrix rows for (int i=0;i<%(A_n_row)d;++i) { double s = 0; // Work out column loop bounds int j_m = (int) ceil((%(A_alpha)d*i-%(A_gamma_p)d)/%(A_beta)f); int j_p = (int) floor((%(A_alpha)d*i+%(A_gamma_m)d)/%(A_beta)f); // Loop over columns for (int j=std::max(0,j_m);j<std::min(%(A_n_col)d,j_p+1);++j) { s += A[0][%(A_bandwidth)d*i+(j-j_m)] * u[0][j]; } v[0][i] += s; } }''' kernel = op2.Kernel(kernel_code % param_dict,'axpy',cpp=True, headers=['#include "cblas.h"'], include_dirs=self._include_dirs, lib_dirs=self._lib_dirs, libs=self._libs) with timed_region('bandedmatrix axpy'): op2.par_loop(kernel, self._hostmesh.cell_set, self._data(op2.READ,self._Vcell.cell_node_map()), u.dat(op2.READ,u.cell_node_map()), v.dat(op2.INC,v.cell_node_map()))
def callback(self): """Finish initialisation.""" del self._callback # Finish the initialisation of mesh topology self.topology.init() with timed_region("Mesh: coordinate field"): coordinates_fs = functionspace.VectorFunctionSpace(self.topology, "Lagrange", 1, dim=geometric_dim) coordinates_data = dmplex.reordered_coords(plex, coordinates_fs._global_numbering, (self.num_vertices(), geometric_dim)) coordinates = function.CoordinatelessFunction(coordinates_fs, val=coordinates_data, name="Coordinates") self.__init__(coordinates)
def __init__(self,W2,W3,dt,c,N,preassemble=True): self._W2 = W2 self._W3 = W3 self._c = c self._N = N #self._omega_N = 0.5*dt*N self._omega_N2 = Constant((0.5*dt*N)**2) self._dt_half = Constant(0.5*dt) self._dt_half_N2 = Constant(0.5*dt*N**2) self._dt_half_c2 = Constant(0.5*dt*c**2) self._preassemble = preassemble self._utest = TestFunction(self._W2) self._ptest = TestFunction(self._W3) self._utrial = TrialFunction(self._W2) self._ptrial = TrialFunction(self._W3) self._mixedarray = MixedArray(self._W2,self._W3) self._u_tmp = Function(self._W2) self._p_tmp = Function(self._W3) self._r_u_tmp = Function(self._W2) self._r_p_tmp = Function(self._W3) self._mesh = self._W3._mesh self._zhat = VerticalNormal(self._mesh) self._dx = self._mesh._dx self._bcs = [DirichletBC(self._W2, 0.0, "bottom"), DirichletBC(self._W2, 0.0, "top")] self.form_uu = ( dot(self._utest,self._utrial) + self._omega_N2 \ * dot(self._utest,self._zhat.zhat) \ * dot(self._zhat.zhat,self._utrial) ) * self._dx self.form_up = -self._dt_half*div(self._utest) * self._ptrial*self._dx self.form_pp = self._ptest * self._ptrial * self._dx self.form_pu = self._ptest*self._dt_half_c2 * div(self._utrial)*self._dx with timed_region('assemble mixed'): if (self._preassemble): self._op_uu = assemble(self.form_uu,bcs=self._bcs) self._op_up = assemble(self.form_up) self._op_pu = assemble(self.form_pu) self._op_pp = assemble(self.form_pp) self._mat_uu = self._op_uu.M.handle self._mat_up = self._op_up.M.handle self._mat_pu = self._op_pu.M.handle self._mat_pp = self._op_pp.M.handle
def _init_monolithic(self): mat = PETSc.Mat() rset, cset = self.sparsity.dsets if rset.cdim != 1: rlgmap = rset.unblocked_lgmap else: rlgmap = rset.lgmap if cset.cdim != 1: clgmap = cset.unblocked_lgmap else: clgmap = cset.lgmap mat.createAIJ(size=((self.nrows, None), (self.ncols, None)), nnz=(self.sparsity.nnz, self.sparsity.onnz), bsize=1, comm=self.comm) mat.setLGMap(rmap=rlgmap, cmap=clgmap) self.handle = mat self._blocks = [] rows, cols = self.sparsity.shape for i in range(rows): row = [] for j in range(cols): row.append(MatBlock(self, i, j)) self._blocks.append(row) mat.setOption(mat.Option.IGNORE_ZERO_ENTRIES, False) mat.setOption(mat.Option.KEEP_NONZERO_PATTERN, True) # We completely fill the allocated matrix when zeroing the # entries, so raise an error if we "missed" one. mat.setOption(mat.Option.UNUSED_NONZERO_LOCATION_ERR, True) mat.setOption(mat.Option.IGNORE_OFF_PROC_ENTRIES, True) mat.setOption(mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) # Put zeros in all the places we might eventually put a value. with timed_region("MatZeroInitial"): for i in range(rows): for j in range(cols): sparsity.fill_with_zeros(self[i, j].handle, self[i, j].sparsity.dims[0][0], self[i, j].sparsity.maps, set_diag=self[i, j].sparsity._has_diagonal) mat.assemble() mat.setOption(mat.Option.IGNORE_ZERO_ENTRIES, True)
def solve(self): """ Apply the solver with rhs state.xrhs and result state.dy. """ # Solve the velocity-density system with timed_region("Gusto:VelocityDensitySolve"): # Assemble the RHS for lambda into self.R with timed_region("Gusto:HybridRHS"): self._assemble_Rexp() # Solve for lambda with timed_region("Gusto:HybridTraceSolve"): self.lSolver.solve(self.lambdar, self.R) # Reconstruct broken u and rho with timed_region("Gusto:HybridRecon"): self._assemble_rho() self._assemble_u() broken_u, rho1 = self.urho.split() u1 = self.u_hdiv # Project broken_u into the HDiv space u1.assign(0.0) with timed_region("Gusto:HybridProjectHDiv"): par_loop(self._average_kernel, dx, {"w": (self._weight, READ), "vec_in": (broken_u, READ), "vec_out": (u1, INC)}) # Reapply bcs to ensure they are satisfied for bc in self.bcs: bc.apply(u1) # Copy back into u and rho cpts of dy u, rho, theta = self.state.dy.split() u.assign(u1) rho.assign(rho1) # Reconstruct theta with timed_region("Gusto:ThetaRecon"): self.theta_solver.solve() # Copy into theta cpt of dy theta.assign(self.theta)
def transpose(self,result=None): '''Calculate transpose of matrix :math:`B=A^T`. Transpose the matrix and return the result. If the parameter result is passed, this matrix is used, otherwise new storage space is allocated. :arg result: resulting matrix :math:`B` ''' if (result != None): assert(result._n_row == self._n_col) assert(result._n_col == self._n_row) assert(result.alpha == self.beta) assert(result.beta == self.alpha) assert(result.gamma_p == self.gamma_m) assert(result.gamma_m == self.gamma_p) else: result = BandedMatrix(self._fs_col,self._fs_row, alpha=self.beta,beta=self.alpha, gamma_m=self.gamma_p,gamma_p=self.gamma_m) param_dict = {} for label, matrix in zip(('A','B'),(self,result)): param_dict.update({label+'_'+x:y for (x,y) in matrix._param_dict.iteritems()}) kernel_code = '''void transpose(double **A, double **B) { for (int i=0;i<%(A_n_row)d;++i) { int j_m = (int) ceil((%(A_alpha)d*i-%(A_gamma_p)d)/(1.0*%(A_beta)f)); int j_p = (int) floor((%(A_alpha)d*i+%(A_gamma_m)d)/(1.0*%(A_beta)f)); for (int j=std::max(0,j_m);j<std::min(%(A_n_col)d,j_p+1);++j) { int i_m = (int) ceil((%(B_alpha)d*j-%(B_gamma_p)d)/(1.0*%(B_beta)f)); B[0][%(B_bandwidth)d*j+(i-i_m)] = A[0][%(A_bandwidth)d*i+(j-j_m)]; } } }''' kernel = op2.Kernel(kernel_code % param_dict, 'transpose',cpp=True) with timed_region('bandedmatrix transpose'): op2.par_loop(kernel, self._hostmesh.cell_set, self._data(op2.READ,self._Vcell.cell_node_map()), result._data(op2.WRITE,self._Vcell.cell_node_map())) return result
def compute(self): """Execute the kernel over all members of the iteration space.""" with timed_region("ParLoopChain: executor (%s)" % self._insp_name): self.halo_exchange_begin() kwargs = { 'all_kernels': self._all_kernels, 'all_itspaces': self._all_itspaces, 'all_args': self._all_args, 'executor': self._executor, 'insp_name': self._insp_name, 'use_glb_maps': self._use_glb_maps, 'use_prefetch': self._use_prefetch } fun = TilingJITModule(self.kernel, self.it_space, *self.args, **kwargs) arglist = self.prepare_arglist(None, *self.args) self._compute(0, fun, *arglist) self.halo_exchange_end() self._compute(1, fun, *arglist) # Only meaningful if the user is enforcing tiling in presence of # global reductions self.reduction_begin() self.reduction_end() self.update_arg_data_state()
def _tridiagonal_solve(self,u): '''In-place tridiagonal solver for u with the Thomas algorithm :arg u: Function to be solved for. ''' kernel_code = '''void tridiagonal_solve(double **A, double **u) { double c_prime[%(A_n_row)d]; // Forward sweep const double inv_tmp = 1./A[0][1]; c_prime[0] = A[0][2]*inv_tmp; u[0][0] *= inv_tmp; for (int i=1;i<%(A_n_row)d;++i) { const double a = A[0][3*i]; const double b = A[0][3*i+1]; const double c = A[0][3*i+2]; const double inv_tmp = 1.0/(b-a*c_prime[i-1]); c_prime[i] = c*inv_tmp; u[0][i] = (u[0][i]-a*u[0][i-1])*inv_tmp; } // Backward sweep for (int i=%(A_n_row)d-2;i>=0;--i) { u[0][i] -= c_prime[i]*u[0][i+1]; } }''' param_dict = {'A_'+x:y for (x,y) in self._param_dict.iteritems()} kernel = op2.Kernel(kernel_code % param_dict, 'tridiagonal_solve') with timed_region('bandedmatrix tridiagonal_solve'): op2.par_loop(kernel, self._hostmesh.cell_set, self._data(op2.READ,self._Vcell.cell_node_map()), u.dat(op2.RW,u.cell_node_map()), name='bandedmatrix tridiagonal_solve['+self._label+']', measure_flops=True) return u
def _init_dense(self): mat = PETSc.Mat() rset, cset = self.sparsity.dsets rlgmap = rset.unblocked_lgmap clgmap = cset.unblocked_lgmap mat.createDense(size=((self.nrows, None), (self.ncols, None)), bsize=1, comm=self.comm) mat.setLGMap(rmap=rlgmap, cmap=clgmap) self.handle = mat self._blocks = [] rows, cols = self.sparsity.shape for i in range(rows): row = [] for j in range(cols): row.append(MatBlock(self, i, j)) self._blocks.append(row) mat.setOption(mat.Option.IGNORE_OFF_PROC_ENTRIES, False) mat.setOption(mat.Option.SUBSET_OFF_PROC_ENTRIES, True) mat.setUp() # Put zeros in all the places we might eventually put a value. with timed_region("MatZeroInitial"): mat.zeroEntries() mat.assemble()
def apply(self, pc, x, y): """We solve the forward eliminated problem for the approximate traces of the scalar solution (the multipliers) and reconstruct the "broken flux and scalar variable." Lastly, we project the broken solutions into the mimetic non-broken finite element space. """ with timed_region("HybridBreak"): with self.unbroken_residual.dat.vec_wo as v: x.copy(v) # Transfer unbroken_rhs into broken_rhs # NOTE: Scalar space is already "broken" so no need for # any projections unbroken_scalar_data = self.unbroken_residual.split()[self.pidx] broken_scalar_data = self.broken_residual.split()[self.pidx] unbroken_scalar_data.dat.copy(broken_scalar_data.dat) with timed_region("HybridProject"): # Handle the incoming HDiv residual: # Solve (approximately) for `g = A.inv * r`, where `A` is the HDiv # mass matrix and `r` is the HDiv residual. # Once `g` is obtained, we take the inner product against broken # HDiv test functions to obtain a broken residual. with self.unbroken_residual.split()[self.vidx].dat.vec_ro as r: with self._primal_r.dat.vec_wo as g: self.hdiv_mass_ksp.solve(r, g) with timed_region("HybridRHS"): # Now assemble the new "broken" hdiv residual self._assemble_broken_r() # Compute the rhs for the multiplier system self._assemble_Srhs() with timed_region("HybridSolve"): # Solve the system for the Lagrange multipliers with self.schur_rhs.dat.vec_ro as b: if self.trace_ksp.getInitialGuessNonzero(): acc = self.trace_solution.dat.vec else: acc = self.trace_solution.dat.vec_wo with acc as x_trace: self.trace_ksp.solve(b, x_trace) # Reconstruct the unknowns self._reconstruct() with timed_region("HybridRecover"): # Project the broken solution into non-broken spaces broken_pressure = self.broken_solution.split()[self.pidx] unbroken_pressure = self.unbroken_solution.split()[self.pidx] broken_pressure.dat.copy(unbroken_pressure.dat) # Compute the hdiv projection of the broken hdiv solution self._assemble_projection_rhs() with self._projection_rhs.dat.vec_ro as b_proj: with self.unbroken_solution.split()[self.vidx].dat.vec_wo as sol: self.hdiv_projection_ksp.solve(b_proj, sol) with self.unbroken_solution.dat.vec_ro as v: v.copy(y)
def initialize(self, pc): """Set up the problem context. Take the original mixed problem and reformulate the problem as a hybridized mixed system. A KSP is created for the Lagrange multiplier system. """ from firedrake import (FunctionSpace, Function, Constant, TrialFunction, TrialFunctions, TestFunction, DirichletBC) from firedrake.assemble import (allocate_matrix, create_assembly_callable) from firedrake.formmanipulation import split_form from ufl.algorithms.replace import replace # Extract the problem context prefix = pc.getOptionsPrefix() + "hybridization_" _, P = pc.getOperators() self.ctx = P.getPythonContext() if not isinstance(self.ctx, ImplicitMatrixContext): raise ValueError( "The python context must be an ImplicitMatrixContext") test, trial = self.ctx.a.arguments() V = test.function_space() mesh = V.mesh() if len(V) != 2: raise ValueError("Expecting two function spaces.") if all(Vi.ufl_element().value_shape() for Vi in V): raise ValueError("Expecting an H(div) x L2 pair of spaces.") # Automagically determine which spaces are vector and scalar for i, Vi in enumerate(V): if Vi.ufl_element().sobolev_space().name == "HDiv": self.vidx = i else: assert Vi.ufl_element().sobolev_space().name == "L2" self.pidx = i # Create the space of approximate traces. W = V[self.vidx] if W.ufl_element().family() == "Brezzi-Douglas-Marini": tdegree = W.ufl_element().degree() else: try: # If we have a tensor product element h_deg, v_deg = W.ufl_element().degree() tdegree = (h_deg - 1, v_deg - 1) except TypeError: tdegree = W.ufl_element().degree() - 1 TraceSpace = FunctionSpace(mesh, "HDiv Trace", tdegree) # Break the function spaces and define fully discontinuous spaces broken_elements = ufl.MixedElement( [ufl.BrokenElement(Vi.ufl_element()) for Vi in V]) V_d = FunctionSpace(mesh, broken_elements) # Set up the functions for the original, hybridized # and schur complement systems self.broken_solution = Function(V_d) self.broken_residual = Function(V_d) self.trace_solution = Function(TraceSpace) self.unbroken_solution = Function(V) self.unbroken_residual = Function(V) shapes = (V[self.vidx].finat_element.space_dimension(), np.prod(V[self.vidx].shape)) domain = "{[i,j]: 0 <= i < %d and 0 <= j < %d}" % shapes instructions = """ for i, j w[i,j] = w[i,j] + 1 end """ self.weight = Function(V[self.vidx]) par_loop((domain, instructions), ufl.dx, {"w": (self.weight, INC)}, is_loopy_kernel=True) instructions = """ for i, j vec_out[i,j] = vec_out[i,j] + vec_in[i,j]/w[i,j] end """ self.average_kernel = (domain, instructions) # Create the symbolic Schur-reduction: # Original mixed operator replaced with "broken" # arguments arg_map = {test: TestFunction(V_d), trial: TrialFunction(V_d)} Atilde = Tensor(replace(self.ctx.a, arg_map)) gammar = TestFunction(TraceSpace) n = ufl.FacetNormal(mesh) sigma = TrialFunctions(V_d)[self.vidx] if mesh.cell_set._extruded: Kform = (gammar('+') * ufl.jump(sigma, n=n) * ufl.dS_h + gammar('+') * ufl.jump(sigma, n=n) * ufl.dS_v) else: Kform = (gammar('+') * ufl.jump(sigma, n=n) * ufl.dS) # Here we deal with boundaries. If there are Neumann # conditions (which should be enforced strongly for # H(div)xL^2) then we need to add jump terms on the exterior # facets. If there are Dirichlet conditions (which should be # enforced weakly) then we need to zero out the trace # variables there as they are not active (otherwise the hybrid # problem is not well-posed). # If boundary conditions are contained in the ImplicitMatrixContext: if self.ctx.row_bcs: # Find all the subdomains with neumann BCS # These are Dirichlet BCs on the vidx space neumann_subdomains = set() for bc in self.ctx.row_bcs: if bc.function_space().index == self.pidx: raise NotImplementedError( "Dirichlet conditions for scalar variable not supported. Use a weak bc" ) if bc.function_space().index != self.vidx: raise NotImplementedError( "Dirichlet bc set on unsupported space.") # append the set of sub domains subdom = bc.sub_domain if isinstance(subdom, str): neumann_subdomains |= set([subdom]) else: neumann_subdomains |= set( as_tuple(subdom, numbers.Integral)) # separate out the top and bottom bcs extruded_neumann_subdomains = neumann_subdomains & { "top", "bottom" } neumann_subdomains = neumann_subdomains - extruded_neumann_subdomains integrand = gammar * ufl.dot(sigma, n) measures = [] trace_subdomains = [] if mesh.cell_set._extruded: ds = ufl.ds_v for subdomain in sorted(extruded_neumann_subdomains): measures.append({ "top": ufl.ds_t, "bottom": ufl.ds_b }[subdomain]) trace_subdomains.extend( sorted({"top", "bottom"} - extruded_neumann_subdomains)) else: ds = ufl.ds if "on_boundary" in neumann_subdomains: measures.append(ds) else: measures.extend((ds(sd) for sd in sorted(neumann_subdomains))) markers = [int(x) for x in mesh.exterior_facets.unique_markers] dirichlet_subdomains = set(markers) - neumann_subdomains trace_subdomains.extend(sorted(dirichlet_subdomains)) for measure in measures: Kform += integrand * measure trace_bcs = [ DirichletBC(TraceSpace, Constant(0.0), subdomain) for subdomain in trace_subdomains ] else: # No bcs were provided, we assume weak Dirichlet conditions. # We zero out the contribution of the trace variables on # the exterior boundary. Extruded cells will have both # horizontal and vertical facets trace_subdomains = ["on_boundary"] if mesh.cell_set._extruded: trace_subdomains.extend(["bottom", "top"]) trace_bcs = [ DirichletBC(TraceSpace, Constant(0.0), subdomain) for subdomain in trace_subdomains ] # Make a SLATE tensor from Kform K = Tensor(Kform) # Assemble the Schur complement operator and right-hand side self.schur_rhs = Function(TraceSpace) self._assemble_Srhs = create_assembly_callable( K * Atilde.inv * AssembledVector(self.broken_residual), tensor=self.schur_rhs, form_compiler_parameters=self.ctx.fc_params) mat_type = PETSc.Options().getString(prefix + "mat_type", "aij") schur_comp = K * Atilde.inv * K.T self.S = allocate_matrix(schur_comp, bcs=trace_bcs, form_compiler_parameters=self.ctx.fc_params, mat_type=mat_type, options_prefix=prefix) self._assemble_S = create_assembly_callable( schur_comp, tensor=self.S, bcs=trace_bcs, form_compiler_parameters=self.ctx.fc_params, mat_type=mat_type) with timed_region("HybridOperatorAssembly"): self._assemble_S() Smat = self.S.petscmat nullspace = self.ctx.appctx.get("trace_nullspace", None) if nullspace is not None: nsp = nullspace(TraceSpace) Smat.setNullSpace(nsp.nullspace(comm=pc.comm)) # Set up the KSP for the system of Lagrange multipliers trace_ksp = PETSc.KSP().create(comm=pc.comm) trace_ksp.setOptionsPrefix(prefix) trace_ksp.setOperators(Smat) trace_ksp.setUp() trace_ksp.setFromOptions() self.trace_ksp = trace_ksp split_mixed_op = dict(split_form(Atilde.form)) split_trace_op = dict(split_form(K.form)) # Generate reconstruction calls self._reconstruction_calls(split_mixed_op, split_trace_op)
def __init__(self, plex, name, reorder, distribute): """Half-initialise a mesh topology. :arg plex: :class:`DMPlex` representing the mesh topology :arg name: name of the mesh :arg reorder: whether to reorder the mesh (bool) :arg distribute: whether to distribute the mesh to parallel processes """ # Do some validation of the input mesh dmplex.validate_mesh(plex) utils._init() self._plex = plex self.name = name # A cache of function spaces that have been built on this mesh self._cache = {} # Mark exterior and interior facets # Note. This must come before distribution, because otherwise # DMPlex will consider facets on the domain boundary to be # exterior, which is wrong. with timed_region("Mesh: label facets"): label_boundary = (op2.MPI.comm.size == 1) or distribute dmplex.label_facets(plex, label_boundary=label_boundary) # Distribute the dm to all ranks if op2.MPI.comm.size > 1 and distribute: # We distribute with overlap zero, in case we're going to # refine this mesh in parallel. Later, when we actually use # it, we grow the halo. plex.distribute(overlap=0) dim = plex.getDimension() cStart, cEnd = plex.getHeightStratum(0) # cells cell_nfacets = plex.getConeSize(cStart) self._grown_halos = False self._ufl_cell = ufl.Cell(_cells[dim][cell_nfacets]) def callback(self): """Finish initialisation.""" del self._callback if op2.MPI.comm.size > 1: self._plex.distributeOverlap(1) self._grown_halos = True if reorder: with timed_region("Mesh: reorder"): old_to_new = self._plex.getOrdering(PETSc.Mat.OrderingType.RCM).indices reordering = np.empty_like(old_to_new) reordering[old_to_new] = np.arange(old_to_new.size, dtype=old_to_new.dtype) else: # No reordering reordering = None self._did_reordering = bool(reorder) # Mark OP2 entities and derive the resulting Plex renumbering with timed_region("Mesh: renumbering"): dmplex.mark_entity_classes(self._plex) self._entity_classes = dmplex.get_entity_classes(self._plex) self._plex_renumbering = dmplex.plex_renumbering(self._plex, self._entity_classes, reordering) with timed_region("Mesh: cell numbering"): # Derive a cell numbering from the Plex renumbering entity_dofs = np.zeros(dim+1, dtype=np.int32) entity_dofs[-1] = 1 self._cell_numbering = self._plex.createSection([1], entity_dofs, perm=self._plex_renumbering) entity_dofs[:] = 0 entity_dofs[0] = 1 self._vertex_numbering = self._plex.createSection([1], entity_dofs, perm=self._plex_renumbering) self._callback = callback
def thunk(bcs): zero_tensor() for (i, j), integral_type, subdomain_id, coords, coefficients, needs_orientations, kernel in kernels: m = coords.function_space().mesh() if needs_orientations: cell_orientations = m.cell_orientations() # Extract block from tensor and test/trial spaces # FIXME Ugly variable renaming required because functions are not # lexical closures in Python and we're writing to these variables if is_mat and tensor.sparsity.shape > (1, 1): tsbc = [bc for bc in bcs if bc.function_space().index == i] trbc = [bc for bc in bcs if bc.function_space().index == j] elif is_mat: tsbc, trbc = bcs, bcs if integral_type == 'cell': with timed_region("Assemble cells"): if is_mat: tensor_arg = mat(lambda s: s.cell_node_map(tsbc), lambda s: s.cell_node_map(trbc), i, j) elif is_vec: tensor_arg = vec(lambda s: s.cell_node_map(), i) else: tensor_arg = tensor(op2.INC) itspace = m.cell_set args = [kernel, itspace, tensor_arg, coords.dat(op2.READ, coords.cell_node_map(), flatten=True)] if needs_orientations: args.append(cell_orientations.dat(op2.READ, cell_orientations.cell_node_map(), flatten=True)) for c in coefficients: args.append(c.dat(op2.READ, c.cell_node_map(), flatten=True)) try: op2.par_loop(*args) except MapValueError: raise RuntimeError("Integral measure does not match measure of all coefficients/arguments") elif integral_type in ['exterior_facet', 'exterior_facet_vert']: with timed_region("Assemble exterior facets"): if is_mat: tensor_arg = mat(lambda s: s.exterior_facet_node_map(tsbc), lambda s: s.exterior_facet_node_map(trbc), i, j) elif is_vec: tensor_arg = vec(lambda s: s.exterior_facet_node_map(), i) else: tensor_arg = tensor(op2.INC) args = [kernel, m.exterior_facets.measure_set(integral_type, subdomain_id), tensor_arg, coords.dat(op2.READ, coords.exterior_facet_node_map(), flatten=True)] if needs_orientations: args.append(cell_orientations.dat(op2.READ, cell_orientations.exterior_facet_node_map(), flatten=True)) for c in coefficients: args.append(c.dat(op2.READ, c.exterior_facet_node_map(), flatten=True)) args.append(m.exterior_facets.local_facet_dat(op2.READ)) try: op2.par_loop(*args) except MapValueError: raise RuntimeError("Integral measure does not match measure of all coefficients/arguments") elif integral_type in ['exterior_facet_top', 'exterior_facet_bottom']: with timed_region("Assemble exterior facets"): # In the case of extruded meshes with horizontal facet integrals, two # parallel loops will (potentially) get created and called based on the # domain id: interior horizontal, bottom or top. # Get the list of sets and globals required for parallel loop construction. set_global_list = m.exterior_facets.measure_set(integral_type, subdomain_id) # Iterate over the list and assemble all the args of the parallel loop for (index, set) in set_global_list: if is_mat: tensor_arg = mat(lambda s: op2.DecoratedMap(s.cell_node_map(tsbc), index), lambda s: op2.DecoratedMap(s.cell_node_map(trbc), index), i, j) elif is_vec: tensor_arg = vec(lambda s: s.cell_node_map(), i) else: tensor_arg = tensor(op2.INC) # Add the kernel, iteration set and coordinate fields to the loop args args = [kernel, set, tensor_arg, coords.dat(op2.READ, coords.cell_node_map(), flatten=True)] if needs_orientations: args.append(cell_orientations.dat(op2.READ, cell_orientations.cell_node_map(), flatten=True)) for c in coefficients: args.append(c.dat(op2.READ, c.cell_node_map(), flatten=True)) try: op2.par_loop(*args, iterate=index) except MapValueError: raise RuntimeError("Integral measure does not match measure of all coefficients/arguments") elif integral_type in ['interior_facet', 'interior_facet_vert']: with timed_region("Assemble interior facets"): if is_mat: tensor_arg = mat(lambda s: s.interior_facet_node_map(tsbc), lambda s: s.interior_facet_node_map(trbc), i, j) elif is_vec: tensor_arg = vec(lambda s: s.interior_facet_node_map(), i) else: tensor_arg = tensor(op2.INC) args = [kernel, m.interior_facets.set, tensor_arg, coords.dat(op2.READ, coords.interior_facet_node_map(), flatten=True)] if needs_orientations: args.append(cell_orientations.dat(op2.READ, cell_orientations.interior_facet_node_map(), flatten=True)) for c in coefficients: args.append(c.dat(op2.READ, c.interior_facet_node_map(), flatten=True)) args.append(m.interior_facets.local_facet_dat(op2.READ)) try: op2.par_loop(*args) except MapValueError: raise RuntimeError("Integral measure does not match measure of all coefficients/arguments") elif integral_type == 'interior_facet_horiz': with timed_region("Assemble interior facets"): if is_mat: tensor_arg = mat(lambda s: op2.DecoratedMap(s.cell_node_map(tsbc), op2.ON_INTERIOR_FACETS), lambda s: op2.DecoratedMap(s.cell_node_map(trbc), op2.ON_INTERIOR_FACETS), i, j) elif is_vec: tensor_arg = vec(lambda s: s.cell_node_map(), i) else: tensor_arg = tensor(op2.INC) args = [kernel, m.interior_facets.measure_set(integral_type, subdomain_id), tensor_arg, coords.dat(op2.READ, coords.cell_node_map(), flatten=True)] if needs_orientations: args.append(cell_orientations.dat(op2.READ, cell_orientations.cell_node_map(), flatten=True)) for c in coefficients: args.append(c.dat(op2.READ, c.cell_node_map(), flatten=True)) try: op2.par_loop(*args, iterate=op2.ON_INTERIOR_FACETS) except MapValueError: raise RuntimeError("Integral measure does not match measure of all coefficients/arguments") else: raise RuntimeError('Unknown integral type "%s"' % integral_type) # Must apply bcs outside loop over kernels because we may wish # to apply bcs to a block which is otherwise zero, and # therefore does not have an associated kernel. if bcs is not None and is_mat: with timed_region('DirichletBC apply'): for bc in bcs: fs = bc.function_space() if isinstance(fs, functionspace.MixedFunctionSpace): raise RuntimeError("""Cannot apply boundary conditions to full mixed space. Did you forget to index it?""") shape = tensor.sparsity.shape for i in range(shape[0]): for j in range(shape[1]): # Set diagonal entries on bc nodes to 1 if the current # block is on the matrix diagonal and its index matches the # index of the function space the bc is defined on. if i == j and (fs.index is None or fs.index == i): tensor[i, j].inc_local_diagonal_entries(bc.nodes) if bcs is not None and is_vec: for bc in bcs: bc.apply(result_function) if is_mat: # Queue up matrix assembly (after we've done all the other operations) tensor.assemble() return result()
def apply(self, pc, x, y): """We solve the forward eliminated problem for the approximate traces of the scalar solution (the multipliers) and reconstruct the "broken flux and scalar variable." Lastly, we project the broken solutions into the mimetic non-broken finite element space. """ with timed_region("HybridBreak"): with self.unbroken_residual.dat.vec_wo as v: x.copy(v) # Transfer unbroken_rhs into broken_rhs # NOTE: Scalar space is already "broken" so no need for # any projections unbroken_scalar_data = self.unbroken_residual.split()[self.pidx] broken_scalar_data = self.broken_residual.split()[self.pidx] unbroken_scalar_data.dat.copy(broken_scalar_data.dat) with timed_region("HybridRHS"): # Assemble the new "broken" hdiv residual # We need a residual R' in the broken space that # gives R'[w] = R[w] when w is in the unbroken space. # We do this by splitting the residual equally between # basis functions that add together to give unbroken # basis functions. unbroken_res_hdiv = self.unbroken_residual.split()[self.vidx] broken_res_hdiv = self.broken_residual.split()[self.vidx] broken_res_hdiv.assign(0) par_loop( self.average_kernel, ufl.dx, { "w": (self.weight, READ), "vec_in": (unbroken_res_hdiv, READ), "vec_out": (broken_res_hdiv, INC) }) # Compute the rhs for the multiplier system self._assemble_Srhs() with timed_region("HybridSolve"): # Solve the system for the Lagrange multipliers with self.schur_rhs.dat.vec_ro as b: if self.trace_ksp.getInitialGuessNonzero(): acc = self.trace_solution.dat.vec else: acc = self.trace_solution.dat.vec_wo with acc as x_trace: self.trace_ksp.solve(b, x_trace) # Reconstruct the unknowns self._reconstruct() with timed_region("HybridRecover"): # Project the broken solution into non-broken spaces broken_pressure = self.broken_solution.split()[self.pidx] unbroken_pressure = self.unbroken_solution.split()[self.pidx] broken_pressure.dat.copy(unbroken_pressure.dat) # Compute the hdiv projection of the broken hdiv solution broken_hdiv = self.broken_solution.split()[self.vidx] unbroken_hdiv = self.unbroken_solution.split()[self.vidx] unbroken_hdiv.assign(0) par_loop( self.average_kernel, ufl.dx, { "w": (self.weight, READ), "vec_in": (broken_hdiv, READ), "vec_out": (unbroken_hdiv, INC) }) with self.unbroken_solution.dat.vec_ro as v: v.copy(y)
def callback(self): del self._callback if op2.MPI.comm.size > 1: self._plex.distributeOverlap(1) self._grown_halos = True if reorder: with timed_region("Mesh: reorder"): old_to_new = self._plex.getOrdering(PETSc.Mat.OrderingType.RCM).indices reordering = np.empty_like(old_to_new) reordering[old_to_new] = np.arange(old_to_new.size, dtype=old_to_new.dtype) else: # No reordering reordering = None # Mark OP2 entities and derive the resulting Plex renumbering with timed_region("Mesh: renumbering"): dmplex.mark_entity_classes(self._plex) self._entity_classes = dmplex.get_entity_classes(self._plex) self._plex_renumbering = dmplex.plex_renumbering(self._plex, self._entity_classes, reordering) with timed_region("Mesh: cell numbering"): # Derive a cell numbering from the Plex renumbering entity_dofs = np.zeros(topological_dim+1, dtype=np.int32) entity_dofs[-1] = 1 self._cell_numbering = self._plex.createSection([1], entity_dofs, perm=self._plex_renumbering) entity_dofs[:] = 0 entity_dofs[0] = 1 self._vertex_numbering = self._plex.createSection([1], entity_dofs, perm=self._plex_renumbering) # Note that for bendy elements, this needs to change. with timed_region("Mesh: coordinate field"): if periodic_coords is not None: if self.ufl_cell().geometric_dimension() != 1: raise NotImplementedError("Periodic coordinates in more than 1D are unsupported") # We've been passed a periodic coordinate field, so use that. self._coordinate_fs = functionspace.VectorFunctionSpace(self, "DG", 1) self.coordinates = function.Function(self._coordinate_fs, val=periodic_coords, name="Coordinates") else: self._coordinate_fs = functionspace.VectorFunctionSpace(self, "Lagrange", 1) coordinates = dmplex.reordered_coords(self._plex, self._coordinate_fs._global_numbering, (self.num_vertices(), geometric_dim)) self.coordinates = function.Function(self._coordinate_fs, val=coordinates, name="Coordinates") self._ufl_domain = ufl.Domain(self.coordinates) # Build a new ufl element for this function space with the # correct domain. This is necessary since this function space # is in the cache and will be picked up by later # VectorFunctionSpace construction. self._coordinate_fs._ufl_element = self._coordinate_fs.ufl_element().reconstruct(domain=self.ufl_domain()) # HACK alert! # Replace coordinate Function by one that has a real domain on it (but don't copy values) self.coordinates = function.Function(self._coordinate_fs, val=self.coordinates.dat) # Add subdomain_data to the measure objects we store with # the mesh. These are weakrefs for consistency with the # "global" measure objects self._dx = ufl.Measure('cell', subdomain_data=weakref.ref(self.coordinates)) self._ds = ufl.Measure('exterior_facet', subdomain_data=weakref.ref(self.coordinates)) self._dS = ufl.Measure('interior_facet', subdomain_data=weakref.ref(self.coordinates)) # Set the subdomain_data on all the default measures to this # coordinate field. # We don't set the domain on the measure since this causes # an uncollectable reference in the global space (dx is # global). Furthermore, it's never used anyway. for measure in [ufl.dx, ufl.ds, ufl.dS]: measure._subdomain_data = weakref.ref(self.coordinates)
def __init__(self, meshfile, **kwargs): """Construct a mesh object. Meshes may either be created by reading from a mesh file, or by providing a PETSc DMPlex object defining the mesh topology. :param meshfile: Mesh file name (or DMPlex object) defining mesh topology. See below for details on supported mesh formats. :param dim: optional specification of the geometric dimension of the mesh (ignored if not reading from mesh file). If not supplied the geometric dimension is deduced from the topological dimension of entities in the mesh. :param reorder: optional flag indicating whether to reorder meshes for better cache locality. If not supplied the default value in :data:`parameters["reorder_meshes"]` is used. :param periodic_coords: optional numpy array of coordinates used to replace those in the mesh object. These are only supported in 1D and must have enough entries to be used as a DG1 field on the mesh. Not supported when reading from file. When the mesh is read from a file the following mesh formats are supported (determined, case insensitively, from the filename extension): * GMSH: with extension `.msh` * Exodus: with extension `.e`, `.exo` * CGNS: with extension `.cgns` * Triangle: with extension `.node` .. note:: When the mesh is created directly from a DMPlex object, the :data:`dim` parameter is ignored (the DMPlex already knows its geometric and topological dimensions). """ utils._init() geometric_dim = kwargs.get("dim", None) reorder = kwargs.get("reorder", parameters["reorder_meshes"]) periodic_coords = kwargs.get("periodic_coords", None) distribute = kwargs.get("distribute", True) if isinstance(meshfile, PETSc.DMPlex): name = "plexmesh" plex = meshfile else: name = meshfile basename, ext = os.path.splitext(meshfile) if periodic_coords is not None: raise RuntimeError("Periodic coordinates are unsupported when reading from file") if ext.lower() in ['.e', '.exo']: plex = _from_exodus(meshfile) elif ext.lower() == '.cgns': plex = _from_cgns(meshfile) elif ext.lower() == '.msh': plex = _from_gmsh(meshfile) elif ext.lower() == '.node': plex = _from_triangle(meshfile, geometric_dim) else: raise RuntimeError("Mesh file %s has unknown format '%s'." % (meshfile, ext[1:])) # Mark exterior and interior facets # Note. This must come before distribution, because otherwise # DMPlex will consider facets on the domain boundary to be # exterior, which is wrong. with timed_region("Mesh: label facets"): label_boundary = op2.MPI.comm.size == 1 or distribute dmplex.label_facets(plex, label_boundary=label_boundary) # Distribute the dm to all ranks if op2.MPI.comm.size > 1 and distribute: # We distribute with overlap zero, in case we're going to # refine this mesh in parallel. Later, when we actually use # it, we grow the halo. plex.distribute(overlap=0) # A cache of function spaces that have been built on this mesh self._cache = {} self.parent = None self.name = name self._plex = plex self.uid = utils._new_uid() topological_dim = self._plex.getDimension() if geometric_dim is None: geometric_dim = topological_dim cStart, cEnd = self._plex.getHeightStratum(0) # cells cell_facets = self._plex.getConeSize(cStart) self._ufl_cell = ufl.Cell(fiat_utils._cells[topological_dim][cell_facets], geometric_dimension=geometric_dim) self._ufl_domain = ufl.Domain(self.ufl_cell(), data=self) self._grown_halos = False def callback(self): del self._callback if op2.MPI.comm.size > 1: self._plex.distributeOverlap(1) self._grown_halos = True if reorder: with timed_region("Mesh: reorder"): old_to_new = self._plex.getOrdering(PETSc.Mat.OrderingType.RCM).indices reordering = np.empty_like(old_to_new) reordering[old_to_new] = np.arange(old_to_new.size, dtype=old_to_new.dtype) else: # No reordering reordering = None # Mark OP2 entities and derive the resulting Plex renumbering with timed_region("Mesh: renumbering"): dmplex.mark_entity_classes(self._plex) self._entity_classes = dmplex.get_entity_classes(self._plex) self._plex_renumbering = dmplex.plex_renumbering(self._plex, self._entity_classes, reordering) with timed_region("Mesh: cell numbering"): # Derive a cell numbering from the Plex renumbering entity_dofs = np.zeros(topological_dim+1, dtype=np.int32) entity_dofs[-1] = 1 self._cell_numbering = self._plex.createSection([1], entity_dofs, perm=self._plex_renumbering) entity_dofs[:] = 0 entity_dofs[0] = 1 self._vertex_numbering = self._plex.createSection([1], entity_dofs, perm=self._plex_renumbering) # Note that for bendy elements, this needs to change. with timed_region("Mesh: coordinate field"): if periodic_coords is not None: if self.ufl_cell().geometric_dimension() != 1: raise NotImplementedError("Periodic coordinates in more than 1D are unsupported") # We've been passed a periodic coordinate field, so use that. self._coordinate_fs = functionspace.VectorFunctionSpace(self, "DG", 1) self.coordinates = function.Function(self._coordinate_fs, val=periodic_coords, name="Coordinates") else: self._coordinate_fs = functionspace.VectorFunctionSpace(self, "Lagrange", 1) coordinates = dmplex.reordered_coords(self._plex, self._coordinate_fs._global_numbering, (self.num_vertices(), geometric_dim)) self.coordinates = function.Function(self._coordinate_fs, val=coordinates, name="Coordinates") self._ufl_domain = ufl.Domain(self.coordinates) # Build a new ufl element for this function space with the # correct domain. This is necessary since this function space # is in the cache and will be picked up by later # VectorFunctionSpace construction. self._coordinate_fs._ufl_element = self._coordinate_fs.ufl_element().reconstruct(domain=self.ufl_domain()) # HACK alert! # Replace coordinate Function by one that has a real domain on it (but don't copy values) self.coordinates = function.Function(self._coordinate_fs, val=self.coordinates.dat) # Add subdomain_data to the measure objects we store with # the mesh. These are weakrefs for consistency with the # "global" measure objects self._dx = ufl.Measure('cell', subdomain_data=weakref.ref(self.coordinates)) self._ds = ufl.Measure('exterior_facet', subdomain_data=weakref.ref(self.coordinates)) self._dS = ufl.Measure('interior_facet', subdomain_data=weakref.ref(self.coordinates)) # Set the subdomain_data on all the default measures to this # coordinate field. # We don't set the domain on the measure since this causes # an uncollectable reference in the global space (dx is # global). Furthermore, it's never used anyway. for measure in [ufl.dx, ufl.ds, ufl.dS]: measure._subdomain_data = weakref.ref(self.coordinates) self._callback = callback
def _compute(self, part, fun, *arglist): with timed_region("ParLoopCKernel"): fun(*(arglist + (part,)))