def _write_serial(self, path, data, metadata): from mpi4py import MPI comm, rank, root = get_comm_rank_root() if rank != root: for tag, buf in enumerate(data): comm.Send(buf.copy(), root, tag) else: # Recv all of the non-local data MPI.Prequest.Startall(self._mpi_rreqs) MPI.Prequest.Waitall(self._mpi_rreqs) # Combine local and MPI data names = it.chain(self._loc_names, self._mpi_names) dats = it.chain(data, self._mpi_rbufs) # Convert any metadata to ASCII metadata = {k: np.array(v, dtype='S') for k, v in metadata.items()} # Create the output dictionary outdict = dict(zip(names, dats), **metadata) with h5py.File(path, 'w') as h5file: for k, v in outdict.items(): h5file[k] = v
def __init__(self, mesh, cfg): self.cfg = cfg comm, rank, root = get_comm_rank_root() # Have the root rank determine the connectivity of the mesh if rank == root: prankconn = self._get_mesh_connectivity(mesh) nparts = len(prankconn) if nparts != comm.size: raise RuntimeError('Mesh has {0} partitions but running with ' '{1} MPI ranks'.format(nparts, comm.size)) else: prankconn = None # Get subclass dependant info about each rank (e.g., hostname) rinfo = comm.gather(self._get_rank_info(), root=root) # If we are the root rank then perform the rank allocation if rank == root: mprankmap = self._get_mprankmap(prankconn, rinfo) else: mprankmap = None # Broadcast the connectivity and rank mappings to all other ranks self.prankconn = prankconn = comm.bcast(prankconn, root=root) self.mprankmap = mprankmap = comm.bcast(mprankmap, root=root) # Invert the mapping to obtain the physical-to-MPI rank mapping self.pmrankmap = sorted(range(comm.size), key=mprankmap.__getitem__) # Compute our physical rank self.prank = mprankmap[rank]
def _errest(self, x, y, z): comm, rank, root = get_comm_rank_root() errest = self._get_errest_kerns() # Obtain an estimate for the squared error self._prepare_reg_banks(x, y, z) self._queue % errest(self._atol, self._rtol) # L2 norm if self._norm == 'l2': # Reduce locally (element types + field variables) err = np.array([sum(v for e in errest.retval for v in e)]) # Reduce globally (MPI ranks) comm.Allreduce(get_mpi('in_place'), err, op=get_mpi('sum')) # Normalise err = math.sqrt(float(err) / self._gndofs) # L^∞ norm else: # Reduce locally (element types + field variables) err = np.array([max(v for e in errest.retval for v in e)]) # Reduce globally (MPI ranks) comm.Allreduce(get_mpi('in_place'), err, op=get_mpi('max')) # Normalise err = math.sqrt(float(err)) return err if not math.isnan(err) else 100
def _write_parallel(self, path, data, metadata): comm, rank, root = get_comm_rank_root() with h5py.File(path, 'w', driver='mpio', comm=comm) as h5file: dmap = {} for name, shape in self._global_shape_list: dmap[name] = h5file.create_dataset(name, shape, dtype=self.fpdtype) # Write out our data sets using 2 GiB chunks for name, dat in zip(self._loc_names, data): nrows = len(dat) rowsz = dat.nbytes // nrows rstep = 2 * 1024**3 // rowsz if rstep == 0: raise RuntimeError('Array is too large for parallel I/O') for ix in range(0, nrows, rstep): dmap[name][ix:ix + rstep] = dat[ix:ix + rstep] # Metadata information has to be transferred to all the ranks if rank == root: mmap = [(k, len(v.encode())) for k, v in metadata.items()] else: mmap = None for name, size in comm.bcast(mmap, root=root): d = h5file.create_dataset(name, (), dtype='S{}'.format(size)) if rank == root: d.write_direct(np.array(metadata[name], dtype='S'))
def _get_gndofs(self): comm, rank, root = get_comm_rank_root() # Get the number of degrees of freedom in this partition ndofs = sum(self.system.ele_ndofs) # Sum to get the global number over all partitions return comm.allreduce(ndofs, op=get_mpi('sum'))
def __call__(self, intg): # If an output is due this step if intg.nacptsteps % self.nsteps == 0 and intg.nacptsteps: # MPI info comm, rank, root = get_comm_rank_root() # Previous and current solution prev = self._prev curr = [ s[intg._idxcurr].get() for s in intg.system.eles_scal_upts_inb_full ] # Square of the residual vector [pad 0 for communication] resid_num = np.array([ sum(np.linalg.norm(c - p)**2 for p, c in zip(prev, curr)), 0. ]) resid_den = np.array([sum(np.linalg.norm(p)**2 for p in prev), 0.]) # Reduce and, if we are the root rank, output if rank != root: comm.Reduce(resid_num, None, op=get_mpi('sum'), root=root) comm.Reduce(resid_den, None, op=get_mpi('sum'), root=root) else: comm.Reduce(get_mpi('in_place'), resid_num, op=get_mpi('sum'), root=root) comm.Reduce(get_mpi('in_place'), resid_den, op=get_mpi('sum'), root=root) # Normalise [Remove the padded 0] resid = np.sqrt(resid_num[:-1] / resid_den[:-1]) # Build the row row = [intg.tcurr] + resid.tolist() # Write print(' ', self.name, ': ', ', '.join("{0:.3e}".format(r) for r in row)) # Flush to disk if (self.isoutf): print(','.join(str(r) for r in row), file=self.outf) self.outf.flush() del self._prev # If an output is due next step if (intg.nacptsteps + 1) % self.nsteps == 0: self._prev = [ s[intg._idxcurr].get() for s in intg.system.eles_scal_upts_inb_full ]
def _invoke_postaction(self, **kwargs): comm, rank, root = get_comm_rank_root() # If we have a post-action and are the root rank then fire it if rank == root and self.postact: # If a post-action is currently running then wait for it if self.postactaid is not None: prefork.wait(self.postactaid) # Prepare the command line cmdline = shlex.split(self.postact.format(**kwargs)) # Invoke if self.postactmode == 'blocking': prefork.call(cmdline) else: self.postactaid = prefork.call_async(cmdline)
def __init__(self, intg, cfgsect, suffix): super().__init__(intg, cfgsect, suffix) comm, rank, root = get_comm_rank_root() # Output frequency self.nsteps = self.cfg.getint(cfgsect, 'nsteps') self.isoutf = self.cfg.getint(cfgsect, 'output-file', 0) # The root rank needs to open the output file if rank == root and self.isoutf: header = ['t', 'f'] # Open self.outf = init_csv(self.cfg, cfgsect, ','.join(header)) # Call ourself in case output is needed after the first step self(intg)
def __init__(self, backend, rallocs, mesh, initsoln, nreg, cfg): if(not backend.name=='cuda'): raise ValueError("CUDA backend supported!") # load the velocity mesh self.vm = self.velocitymeshcls(backend, cfg, self._nspcs) cv = self.vm.cv() vsize = self.vm.vsize() # need to define the expressions # the prefix "f_" should be same as in elementcls distvar # size of distvar should be equal to NvBatchSize for ivar in range(self.vm.NvBatchSize()): cfg.set('soln-ics', 'f_' + str(ivar), '0.') # now, we can initialize things super().__init__(backend, rallocs, mesh, initsoln, nreg, cfg, vm=self.vm) print('Finished initializing the BaseSystem') # define the time-step minjac = 100.0 for t, ele in self.ele_map.items(): djac = ele.djac_at_np('upts') minjac = np.min([minjac, np.min(djac)]) advmax = self.vm.L() unitCFLdt = np.array([np.sqrt(minjac)/advmax/self.ndims]) gunitCFLdt = np.zeros(1) # MPI info comm, rank, root = get_comm_rank_root() # Reduce and, if we are the root rank, output if rank != root: comm.Reduce(unitCFLdt, gunitCFLdt, op=get_mpi('min'), root=root) else: comm.Reduce(unitCFLdt, gunitCFLdt, op=get_mpi('min'), root=root) print("Time-step for unit CFL:", gunitCFLdt) print("The actual time-step will depend on DG order CFL") # load the scattering model smn = cfg.get('scattering-model', 'type') scatteringcls = subclass_where(DGFSBiScatteringModel, scattering_model=smn) self.sm = scatteringcls(backend, self.cfg, self.vm) # Allocate and bank the storage required by the time integrator #eles_scal_upts_full = proxylist(self.ele_banks) eles_scal_upts_inb_full = proxylist(self.ele_banks) spcs_eles_scal_upts_full = [list(self.ele_banks) for spcs in range(self._nspcs)] if initsoln: #raise ValueError("Not implemented") # Load the config and stats files from the solution solncfg = Inifile(initsoln['config']) solnsts = Inifile(initsoln['stats']) # Get the names of the conserved variables (fields) solnfields = solnsts.get('data', 'fields', '') # see dgfsdistwriterbi.py plugin currfields = [] fields = ['f_'+str(i) for i in range(vsize)] lf = len(fields) for p in range(self._nspcs): currfields.extend(fields) for ivar in range(-1,-lf-1,-1): currfields[ivar] += ':'+str(p+1) currfields = ','.join(currfields) # Ensure they match up if solnfields and solnfields != currfields: raise RuntimeError('Invalid solution for system') # Ensure the solnfields are not empty if not solnfields: raise RuntimeError('Invalid solution for system') nreg0 = nreg//self._nspcs assert nreg==nreg0*self._nspcs, "Should be multiple of nspcs" # Process the solution for t, (k, ele) in enumerate(self.ele_map.items()): soln = initsoln['soln_%s_p%d' % (k, rallocs.prank)] #ele.set_ics_from_soln(soln, solncfg) # Recreate the existing solution basis solnb = ele.basis.__class__(None, solncfg) # Form the interpolation operator interp = solnb.ubasis.nodal_basis_at(ele.basis.upts) # Apply and reshape data = np.dot(interp, soln.reshape(solnb.nupts, -1)) data = data.reshape(ele.nupts, self._nspcs*vsize, ele.neles) for p in range(self._nspcs): spcs_eles_scal_upts_full[p][t] = data[:, p*vsize:(p+1)*vsize, :] else: # load the initial condition model icn = cfg.get('soln-ics', 'type') initcondcls = subclass_where(DGFSBiInitCondition, model=icn) ic = initcondcls(backend, cfg, self.vm, 'soln-ics') #initvals = ic.get_init_vals() nreg0 = nreg//self._nspcs assert nreg==nreg0*self._nspcs, "Should be multiple of nspcs" # loop over the sub-domains in the full mixed domain for p in range(self._nspcs): for t, ele in enumerate(self.ele_map.values()): spcs_eles_scal_upts_full[p][t] = np.empty( (ele.nupts, vsize, ele.neles)) ic.apply_init_vals(p, spcs_eles_scal_upts_full[p][t], ele) # Convert from primitive to conservative form if needed nreg0 = nreg//self._nspcs assert nreg==nreg0*self._nspcs, "Should be multiple of nspcs" for t in range(len(eles_scal_upts_inb_full)): scal_upts_full = [] for p in range(self._nspcs): if p==0: scal_upts_full = [ backend.matrix(spcs_eles_scal_upts_full[p][t].shape, spcs_eles_scal_upts_full[p][t], tags={'align'}) for i in range(nreg0)] else: scal_upts_full.extend([ backend.matrix(spcs_eles_scal_upts_full[p][t].shape, spcs_eles_scal_upts_full[p][t], tags={'align'}) for i in range(nreg0)]) eles_scal_upts_inb_full[t] = backend.matrix_bank(scal_upts_full) #eles_scal_upts_outb_full[t] = backend.matrix_bank(scal_upts_full) self.eles_scal_upts_inb_full = eles_scal_upts_inb_full del spcs_eles_scal_upts_full
def __init__(self, intg, nvars, basedir, basename, *, prefix, extn='.frfss'): # Base output directory and file name self.basedir = basedir self.basename = basename # Append the relevant extension if not self.basename.endswith(extn): self.basename += extn # Prefix given to each data array in the output file self.prefix = prefix # Output counter (incremented each time write() is called) self.nout = self._restore_nout() if intg.isrestart else 0 # Copy the float type self.fpdtype = intg.backend.fpdtype # MPI info comm, rank, root = get_comm_rank_root() # Get the type and shape of each element in the partition etypes = intg.system.ele_types shapes = [(nupts, nvars, neles) for nupts, _, neles in intg.system.ele_shapes] # Gather eleinfo = comm.allgather(zip(etypes, shapes)) # Parallel I/O if (h5py.get_config().mpi and 'FRFS_FORCE_SERIAL_HDF5' not in os.environ): self._write = self._write_parallel self._loc_names = loc_names = [] self._global_shape_list = [] for mrank, meleinfo in enumerate(eleinfo): prank = intg.rallocs.mprankmap[mrank] # Loop over all element types across all ranks for etype, shape in meleinfo: name = self._get_name_for_data(etype, prank) self._global_shape_list.append((name, shape)) if rank == mrank: loc_names.append(name) # Serial I/O else: self._write = self._write_serial if rank == root: self._mpi_rbufs = mpi_rbufs = [] self._mpi_rreqs = mpi_rreqs = [] self._mpi_names = mpi_names = [] self._loc_names = loc_names = [] for mrank, meleinfo in enumerate(eleinfo): prank = intg.rallocs.mprankmap[mrank] for tag, (etype, shape) in enumerate(meleinfo): name = self._get_name_for_data(etype, prank) if mrank == root: loc_names.append(name) else: rbuf = np.empty(shape, dtype=self.fpdtype) rreq = comm.Recv_init(rbuf, mrank, tag) mpi_rbufs.append(rbuf) mpi_rreqs.append(rreq) mpi_names.append(name)
def __call__(self, intg): # If an output is due this step if intg.nacptsteps % self.nsteps == 0 and intg.nacptsteps: # MPI info comm, rank, root = get_comm_rank_root() # Previous and current solution prev = self._prev # the assumption is that current soln is in 0th register curr = [0] * intg.system._nspcs for p in range(intg.system._nspcs): curr[p] = [ s[intg._stepper_nregs_orig * p].get() for s in intg.system.eles_scal_upts_inb_full ] #for spcs in range(intg.system._nspcs): # print(len(prev), len(prev[spcs])) # print(len(curr), len(curr[spcs])) # for p, c in zip(prev[spcs], curr[spcs]): # print(p.shape, c.shape) # Square of the residual vector resid_num = np.zeros(intg.system._nspcs) resid_den = np.zeros(intg.system._nspcs) for spcs in range(intg.system._nspcs): resid_num[spcs] = sum( np.linalg.norm(c - p)**2 for p, c in zip(prev[spcs], curr[spcs])) resid_den[spcs] = sum(np.linalg.norm(p)**2 for p in prev[spcs]) # Reduce and, if we are the root rank, output if rank != root: comm.Reduce(resid_num, None, op=get_mpi('sum'), root=root) comm.Reduce(resid_den, None, op=get_mpi('sum'), root=root) else: comm.Reduce(get_mpi('in_place'), resid_num, op=get_mpi('sum'), root=root) comm.Reduce(get_mpi('in_place'), resid_den, op=get_mpi('sum'), root=root) # Normalise [Remove the padded 0] resid = np.sqrt(resid_num / resid_den) # Build the row row = [intg.tcurr] + resid.tolist() # Write print(' ', self.name, ': ', ', '.join("{0:.3e}".format(r) for r in row)) # Flush to disk if (self.isoutf): print(','.join(str(r) for r in row), file=self.outf) self.outf.flush() del self._prev # If an output is due next step if (intg.nacptsteps + 1) % self.nsteps == 0: # the assumption is that current soln is in 0th register self._prev = [0] * intg.system._nspcs for p in range(intg.system._nspcs): self._prev[p] = [ s[intg._stepper_nregs_orig * p].get() for s in intg.system.eles_scal_upts_inb_full ]
def __init__(self, intg, cfgsect, suffix): super().__init__(intg, cfgsect, suffix) comm, rank, root = get_comm_rank_root() # Output frequency self.nsteps = self.cfg.getint(cfgsect, 'nsteps') # Constant variables self._constants = self.cfg.items_as('constants', float) # Underlying elements class self.elementscls = intg.system.elementscls # Boundary to integrate over bc = 'bcon_{0}_p{1}'.format(suffix, intg.rallocs.prank) self.suffix = suffix # Get the mesh and elements mesh, elemap = intg.system.mesh, intg.system.ele_map # See which ranks have the boundary bcranks = comm.gather(bc in mesh, root=root) # Output field names aka properties to be computed # px, py, pz: x, y, z components of normal pressure # fx, fy, fz: x, y, z components of force # q: total normal heat flux $\int Q_j n_j dA / \int dA$ self.fields = (['fx', 'fy', 'fz'][:self.ndims] + ['q']) # create an instance of DGFSMomWriterStd class # Ceveat: One instance for every boundary :P self._moms = DGFSMomWriterStdPlugin(intg, cfgsect, suffix=None, write=False) # The root rank needs to open the output file if rank == root: if not any(bcranks): raise RuntimeError( 'Boundary {0} does not exist'.format(suffix)) # CSV header header = ['t'] + self.fields # Open self.outf = init_csv(self.cfg, cfgsect, ','.join(header)) """ # We need to dump the surface properties in a file # Construct the solution writer basedir = self.cfg.getpath(cfgsect, 'basedir', '.', abs=True) basename = self.cfg.get(cfgsect, 'basename') # Output field names self.fields = (['x', 'y', 'z'][:self.ndims] + ['nx', 'ny', 'nz'][:self.ndims] + self.fields) self._nvars = len(self.fields) self._writer = NativeWriter(intg, self._nvars, basedir, basename, prefix='soln') """ # Interpolation matrices and quadrature weights self._m0 = m0 = {} self._qwts = qwts = defaultdict(list) # If we have the boundary then process the interface if bc in mesh: # Element indices and associated face normals eidxs = defaultdict(list) norms = defaultdict(list) mnorms = defaultdict(list) plocs = defaultdict(list) fidcount = dict() for etype, eidx, fidx, flags in mesh[bc].astype('U4,i4,i1,i1'): eles = elemap[etype] if (etype, fidx) not in m0: facefpts = eles.basis.facefpts[fidx] m0[etype, fidx] = eles.basis.m0[facefpts] qwts[etype, fidx] = eles.basis.fpts_wts[facefpts] # Unit physical normals and their magnitudes (including |J|) npn = eles.get_norm_pnorms(eidx, fidx) mpn = eles.get_mag_pnorms(eidx, fidx) ploc = eles.get_ploc(eidx, fidx) eidxs[etype, fidx].append(eidx) norms[etype, fidx].append(mpn[:, None] * npn) mnorms[etype, fidx].append(mpn[:, None]) plocs[etype, fidx].append(ploc[:, None]) if etype not in fidcount: fidcount[etype] = m0[etype, fidx].shape[0] else: fidcount[etype] += m0[etype, fidx].shape[0] self._eidxs = {k: np.array(v) for k, v in eidxs.items()} self._norms = {k: np.array(v) for k, v in norms.items()} self._mnorms = {k: np.array(v) for k, v in mnorms.items()} self._plocs = {k: np.array(v) for k, v in plocs.items()} # allocate variables #self._surfdata = [np.empty((fidcount[etype], self._nvars)) # for etype in intg.system.ele_types] self(intg)
def __call__(self, intg): # Return if no output is due if intg.nacptsteps % self.nsteps: return # MPI info comm, rank, root = get_comm_rank_root() # compute moments self._moms.compute_moments(intg) soln = self._moms.bulksoln # Solution matrices indexed by element type solns = dict(zip(intg.system.ele_types, soln)) ndims, nvars = self.ndims, soln[0].shape[1] # surf data #surfdata = dict(zip(intg.system.ele_types, self._surfdata)) #cnt = dict() # Force vector f = np.zeros(ndims + 2) for etc, (etype, fidx) in enumerate(self._m0): #if etc not in cnt: cnt[etc] = 0 # Get the interpolation operator m0 = self._m0[etype, fidx] nfpts, nupts = m0.shape # Extract the relevant elements from the solution uupts = solns[etype][..., self._eidxs[etype, fidx]] # Interpolate to the face ufpts = np.dot(m0, uupts.reshape(nupts, -1)) ufpts = ufpts.reshape(nfpts, nvars, -1) ufpts = ufpts.swapaxes(0, 1) # Compute the pressure pidx = -1 # the pressure is the last variable (see privarmap) p = self.elementscls.con_to_pri(ufpts, self.cfg)[pidx] # compute the heat flux qidx = 5 if self.ndims == 2 else 6 q = [ self.elementscls.con_to_pri(ufpts, self.cfg)[qidx + idx] for idx in range(self.ndims) ] # Get the quadrature weights and normal vectors qwts = self._qwts[etype, fidx] norms = self._norms[etype, fidx] mnorms = self._mnorms[etype, fidx] plocs = np.squeeze(self._plocs[etype, fidx]) # Compute forces f[:ndims] += np.einsum('i...,ij,jik', qwts, p, norms) # Compute total heat transfer f[ndims] += np.einsum('i...,kij,jik', qwts, q, norms) # Compute total area f[ndims + 1] += np.sum(mnorms) """ # coordinates and surface normals surfnorm = np.einsum('i...,jik->jk', qwts, norms) surfploc = np.einsum('i...,jik->jk', qwts, plocs) # append data print(np.hstack((surfploc[:,:ndims], surfnorm[:,:ndims])).shape) print(surfploc[:,:ndims].shape) print("tada:", self._surfdata[etc].shape) print("etc:", etc, ", etype:", etype) self._surfdata[etc][cnt[etc]:cnt[etc]+nfpts, :2*ndims]=np.hstack( (surfploc[:,:ndims], surfnorm[:,:ndims])) self._surfdata[etc][cnt[etc]:cnt[etc]+nfpts, 2*ndims:]=f[:ndims+1] cnt[etc] += nfpts """ # Reduce and output if we're the root rank if rank != root: comm.Reduce(f, None, op=get_mpi('sum'), root=root) else: comm.Reduce(get_mpi('in_place'), f, op=get_mpi('sum'), root=root) # compute the force #f[:ndims] *= f[ndims+1] # compute the total heat transfer per unit area f[ndims] /= f[ndims + 1] # Build the row row = [intg.tcurr] + f[:-1].tolist() # write to console print(self.name, self.suffix, ':', ' ,'.join(str(r) for r in row)) # Write print(','.join(str(r) for r in row), file=self.outf) # Flush to disk self.outf.flush() """