def _errest(self, x, y, z): comm, rank, root = get_comm_rank_root() errest = self._get_errest_kerns() # Obtain an estimate for the squared error self._prepare_reg_banks(x, y, z) self._queue % errest(self._atol, self._rtol) # L2 norm if self._norm == 'l2': # Reduce locally (element types + field variables) err = np.array([sum(v for e in errest.retval for v in e)]) # Reduce globally (MPI ranks) comm.Allreduce(get_mpi('in_place'), err, op=get_mpi('sum')) # Normalise err = math.sqrt(float(err) / self._gndofs) # L^∞ norm else: # Reduce locally (element types + field variables) err = np.array([max(v for e in errest.retval for v in e)]) # Reduce globally (MPI ranks) comm.Allreduce(get_mpi('in_place'), err, op=get_mpi('max')) # Normalise err = math.sqrt(float(err)) return err if not math.isnan(err) else 100
def __init__(self, intg, cfgsect, suffix): super().__init__(intg, cfgsect, suffix) comm, rank, root = get_comm_rank_root() # Output frequency self.nsteps = self.cfg.getint(cfgsect, 'nsteps') # The root rank needs to open the output file if rank == root: # Determine the file path fname = self.cfg.get(cfgsect, 'file') # Append the '.csv' extension if not fname.endswith('.csv'): fname += '.csv' # Open for appending self.outf = open(fname, 'a') # Output a header if required if (os.path.getsize(fname) == 0 and self.cfg.getbool(cfgsect, 'header', True)): # Conservative variable list convars = intg.system.elementscls.convarmap[self.ndims] print(','.join(['t'] + convars), file=self.outf)
def __init__(self, intg, cfgsect, suffix): super().__init__(intg, cfgsect, suffix) comm, rank, root = get_comm_rank_root() # Output frequency self.nsteps = self.cfg.getint(cfgsect, "nsteps") # The root rank needs to open the output file if rank == root: # Determine the file path fname = self.cfg.get(cfgsect, "file") # Append the '.csv' extension if not fname.endswith(".csv"): fname += ".csv" # Open for appending self.outf = open(fname, "a") # Output a header if required if os.path.getsize(fname) == 0 and self.cfg.getbool(cfgsect, "header", True): # Conservative variable list convars = intg.system.elementscls.convarmap[self.ndims] print(",".join(["t"] + convars), file=self.outf)
def _write_parallel(self, path, data, metadata): comm, rank, root = get_comm_rank_root() with h5py.File(path, 'w', driver='mpio', comm=comm) as h5file: dmap = {} for name, shape in self._global_shape_list: dmap[name] = h5file.create_dataset( name, shape, dtype=self.fpdtype ) # Write out our data sets using 2 GiB chunks for name, dat in zip(self._loc_names, data): nrows = len(dat) rowsz = dat.nbytes // nrows rstep = 2*1024**3 // rowsz if rstep == 0: raise RuntimeError('Array is too large for parallel I/O') for ix in range(0, nrows, rstep): dmap[name][ix:ix + rstep] = dat[ix:ix + rstep] # Metadata information has to be transferred to all the ranks if rank == root: mmap = [(k, len(v.encode())) for k, v in metadata.items()] else: mmap = None for name, size in comm.bcast(mmap, root=root): d = h5file.create_dataset(name, (), dtype='S{}'.format(size)) if rank == root: d.write_direct(np.array(metadata[name], dtype='S'))
def _errest(self, x, y, z): comm, rank, root = get_comm_rank_root() errest = self._get_errest_kerns() # Obtain an estimate for the squared error self._prepare_reg_banks(x, y, z) self._queue % errest(self._atol, self._rtol) # L2 norm if self._norm == 'l2': # Reduce locally (element types) and globally (MPI ranks) rl = sum(errest.retval) rg = comm.allreduce(rl, op=get_mpi('sum')) # Normalise err = math.sqrt(rg / self._gndofs) # Uniform norm else: # Reduce locally (element types) and globally (MPI ranks) rl = max(errest.retval) rg = comm.allreduce(rl, op=get_mpi('max')) # Normalise err = math.sqrt(rg) return err if not math.isnan(err) else 100
def _resid(self, dtau, x): comm, rank, root = get_comm_rank_root() # Get an errest kern to compute the square of the maximum residual errest = self._get_errest_kerns() # Prepare and run the kernel self._prepare_reg_banks(x, x, x) self._queue % errest(dtau, 0.0) # L2 norm if self._pseudo_norm == 'l2': # Reduce locally (element types) and globally (MPI ranks) res = np.array([sum(ev) for ev in zip(*errest.retval)]) comm.Allreduce(get_mpi('in_place'), res, op=get_mpi('sum')) # Normalise and return return np.sqrt(res / self._gndofs) # L^∞ norm else: # Reduce locally (element types) and globally (MPI ranks) res = np.array([max(ev) for ev in zip(*errest.retval)]) comm.Allreduce(get_mpi('in_place'), res, op=get_mpi('max')) # Normalise and return return np.sqrt(res)
def _write_parallel(self, path, solnmap, metadata): comm, rank, root = get_comm_rank_root() with h5py.File(path, 'w', driver='mpio', comm=comm) as h5file: smap = {} for name, shape in self.sollist: smap[name] = h5file.create_dataset( name, shape, dtype=self.backend.fpdtype ) for e, sol in solnmap.items(): s = self._get_name_for_soln(e, self.rallocs.prank) smap[s][:] = sol # Metadata information has to be transferred to all the ranks if rank == root: mmap = [(k, len(v.encode())) for k, v in metadata.items()] else: mmap = None for name, size in comm.bcast(mmap, root=root): d = h5file.create_dataset(name, (), dtype='S{}'.format(size)) if rank == root: d.write_direct(np.array(metadata[name], dtype='S'))
def __init__(self, mesh, cfg): self.cfg = cfg comm, rank, root = get_comm_rank_root() # Have the root rank determine the connectivity of the mesh if rank == root: prankconn = self._get_mesh_connectivity(mesh) nparts = len(prankconn) if nparts != comm.size: raise RuntimeError( "Mesh has {0} partitions but running with " "{1} MPI ranks".format(nparts, comm.size) ) else: prankconn = None # Get subclass dependant info about each rank (e.g., hostname) rinfo = comm.gather(self._get_rank_info(), root=root) # If we are the root rank then perform the rank allocation if rank == root: mprankmap = self._get_mprankmap(prankconn, rinfo) else: mprankmap = None # Broadcast the connectivity and rank mappings to all other ranks self.prankconn = prankconn = comm.bcast(prankconn, root=root) self.mprankmap = mprankmap = comm.bcast(mprankmap, root=root) # Invert the mapping to obtain the physical-to-MPI rank mapping self.pmrankmap = sorted(range(comm.size), key=mprankmap.__getitem__) # Compute our physical rank self.prank = mprankmap[rank]
def __init__(self, *args, **kwargs): super(FileWriter, self).__init__(*args, **kwargs) # MPI info comm, rank, root = get_comm_rank_root() # Get the type and shape of each element in the partition etypes, shapes = self._system.ele_types, self._system.ele_shapes # Gather this information onto the root rank eleinfo = comm.gather(zip(etypes, shapes), root=root) if rank == root: self._mpi_rbufs = mpi_rbufs = [] self._mpi_rreqs = mpi_rreqs = [] self._mpi_names = mpi_names = [] self._loc_names = loc_names = [] for mrank, meleinfo in enumerate(eleinfo): prank = self._rallocs.mprankmap[mrank] for tag, (etype, dims) in enumerate(meleinfo): name = self._get_name_for_soln(etype, prank) if mrank == root: loc_names.append(name) else: rbuf = np.empty(dims, dtype=self._backend.fpdtype) rreq = comm.Recv_init(rbuf, mrank, tag) mpi_rbufs.append(rbuf) mpi_rreqs.append(rreq) mpi_names.append(name)
def _write_parallel(self, path, data, metadata): comm, rank, root = get_comm_rank_root() with h5py.File(path, 'w', driver='mpio', comm=comm) as h5file: dmap = {} for name, shape in self._global_shape_list: dmap[name] = h5file.create_dataset( name, shape, dtype=self.fpdtype ) for s, dat in zip(self._loc_names, data): dmap[s][:] = dat # Metadata information has to be transferred to all the ranks if rank == root: mmap = [(k, len(v.encode())) for k, v in metadata.items()] else: mmap = None for name, size in comm.bcast(mmap, root=root): d = h5file.create_dataset(name, (), dtype='S{}'.format(size)) if rank == root: d.write_direct(np.array(metadata[name], dtype='S'))
def _write_parallel(self, path, data, metadata): comm, rank, root = get_comm_rank_root() with h5py.File(path, 'w', driver='mpio', comm=comm) as f: dmap = {} for name, shape, dtype in self._global_shape_list: dmap[name] = f.create_dataset(name, shape, dtype=dtype) # Write out our data sets using 2 GiB chunks for name, dat in zip(self._loc_names, data): nrows = len(dat) rowsz = dat.nbytes // nrows rstep = 2 * 1024**3 // rowsz if rstep == 0: raise RuntimeError('Array is too large for parallel I/O') for ix in range(0, nrows, rstep): dmap[name][ix:ix + rstep] = dat[ix:ix + rstep] # Metadata information has to be transferred to all the ranks if rank == root: mmap = [(k, len(v.encode())) for k, v in metadata.items()] else: mmap = None for name, size in comm.bcast(mmap, root=root): d = f.create_dataset(name, (), dtype='S{}'.format(size)) if rank == root: d.write_direct(np.array(metadata[name], dtype='S')) # Wait for everyone to finish writing comm.barrier()
def _write_serial(self, path, data, metadata): comm, rank, root = get_comm_rank_root() if rank != root: for tag, buf in enumerate(data): comm.Send(buf.copy(), root) else: with h5py.File(path, 'w') as f: # Write the metadata for k, v in metadata.items(): f[k] = np.array(v, dtype='S') # Write our local data for k, v in zip(self._loc_info, data): f[k] = v # Receive and write the remote data for k, mrank, shape, dtype in self._mpi_info: v = np.empty(shape, dtype=dtype) comm.Recv(v, mrank) f[k] = v # Wait for the root rank to finish writing comm.barrier()
def __init__(self, mesh, cfg): self.cfg = cfg comm, rank, root = get_comm_rank_root() if rank == root: # Determine the (physical) connectivity of the mesh prankconn = self._get_mesh_connectivity(mesh) nparts = len(prankconn) or 1 if nparts != comm.size: raise RuntimeError('Mesh has %d partitions but running with ' '%d MPI ranks' % (nparts, comm.size)) else: prankconn = None # Get subclass dependant info about each rank (e.g, hostname) rinfo = comm.gather(self._get_rank_info(), root=root) if rank == root: # Use this info to construct a mapping from MPI ranks to # physical mesh ranks mprankmap = self._get_mprankmap(prankconn, rinfo) else: mprankmap = None # Broadcast the connectivity and physical to each MPI rank self.prankconn = comm.bcast(prankconn, root=root) self.mprankmap = comm.bcast(mprankmap, root=root) # Invert this mapping self.pmrankmap = {v: k for k, v in self.mprankmap.items()} # Compute the physical rank of ourself self.prank = self.mprankmap[rank]
def __call__(self, intg): # Return if no output is due if intg.nacptsteps % self.nsteps: return # MPI info comm, rank, root = get_comm_rank_root() # Get the solution matrices solns = intg.soln # Perform the sampling and interpolation samples = [op @ solns[et][:, :, ei] for et, ei, _, op in self._ourpts] samples = self._process_samples(samples) # Gather to the root rank comm.Gatherv(samples, self._ptsrecv, root=root) # If we're the root rank then output if rank == root: for off, ploc in self._ptsinfo: print(intg.tcurr, *ploc, *self._ptsbuf[off], sep=',', file=self.outf) # Flush to disk self.outf.flush()
def _resid(self, rcurr, rold, dt_fac): comm, rank, root = get_comm_rank_root() # Get a reduction kern to compute the square of the maximum residual resid = self._get_reduction_kerns(rcurr, rold, method='resid', norm=self._pseudo_norm) # Run the kernel self._queue.enqueue_and_run(resid, dt_fac) # L2 norm if self._pseudo_norm == 'l2': # Reduce locally (element types) and globally (MPI ranks) res = np.array([sum(ev) for ev in zip(*[r.retval for r in resid])]) comm.Allreduce(get_mpi('in_place'), res, op=get_mpi('sum')) # Normalise and return return tuple(np.sqrt(res / self._gndofs)) # L^∞ norm else: # Reduce locally (element types) and globally (MPI ranks) res = np.array([max(ev) for ev in zip(*[r.retval for r in resid])]) comm.Allreduce(get_mpi('in_place'), res, op=get_mpi('max')) # Normalise and return return tuple(np.sqrt(res))
def _write_serial(self, path, data, metadata): from mpi4py import MPI comm, rank, root = get_comm_rank_root() if rank != root: for tag, buf in enumerate(data): comm.Send(buf.copy(), root, tag) else: # Recv all of the non-local data MPI.Prequest.Startall(self._mpi_rreqs) MPI.Prequest.Waitall(self._mpi_rreqs) # Combine local and MPI data names = it.chain(self._loc_names, self._mpi_names) dats = it.chain(data, self._mpi_rbufs) # Convert any metadata to ASCII metadata = {k: np.array(v, dtype='S') for k, v in metadata.items()} # Create the output dictionary outdict = dict(zip(names, dats), **metadata) with h5py.File(path, 'w') as h5file: for k, v in outdict.items(): h5file[k] = v
def __init__(self, mesh, cfg): self.cfg = cfg comm, rank, root = get_comm_rank_root() # Have the root rank determine the connectivity of the mesh if rank == root: prankconn = self._get_mesh_connectivity(mesh) nparts = len(prankconn) if nparts != comm.size: raise RuntimeError('Mesh has {0} partitions but running with ' '{1} MPI ranks'.format(nparts, comm.size)) else: prankconn = None # Get subclass dependant info about each rank (e.g., hostname) rinfo = comm.gather(self._get_rank_info(), root=root) # If we are the root rank then perform the rank allocation if rank == root: mprankmap = self._get_mprankmap(prankconn, rinfo) else: mprankmap = None # Broadcast the connectivity and rank mappings to all other ranks self.prankconn = prankconn = comm.bcast(prankconn, root=root) self.mprankmap = mprankmap = comm.bcast(mprankmap, root=root) # Invert the mapping to obtain the physical-to-MPI rank mapping self.pmrankmap = sorted(range(comm.size), key=mprankmap.__getitem__) # Compute our physical rank self.prank = mprankmap[rank]
def _errest(self, rcurr, rprev, rerr): comm, rank, root = get_comm_rank_root() errest = self._get_reduction_kerns(rcurr, rprev, rerr, method='errest', norm=self._norm) # Obtain an estimate for the squared error self._queue.enqueue_and_run(errest, self._atol, self._rtol) # L2 norm if self._norm == 'l2': # Reduce locally (element types + field variables) err = np.array([sum(v for e in errest for v in e.retval)]) # Reduce globally (MPI ranks) comm.Allreduce(get_mpi('in_place'), err, op=get_mpi('sum')) # Normalise err = math.sqrt(float(err) / self._gndofs) # L^∞ norm else: # Reduce locally (element types + field variables) err = np.array([max(v for e in errest for v in e.retval)]) # Reduce globally (MPI ranks) comm.Allreduce(get_mpi('in_place'), err, op=get_mpi('max')) # Normalise err = math.sqrt(float(err)) return err if not math.isnan(err) else 100
def __init__(self, intg, basedir, basename, prefix, *, extn='.pyfrs'): # Base output directory and file name self.basedir = basedir self.basename = basename # Data prefix self.prefix = prefix # Our physical rank self.prank = intg.rallocs.prank # Append the relevant extension if not self.basename.endswith(extn): self.basename += extn # Output counter (incremented each time write() is called) self.nout = self._restore_nout() if intg.isrestart else 0 # MPI info comm, rank, root = get_comm_rank_root() # Parallel I/O if (h5py.get_config().mpi and 'PYFR_FORCE_SERIAL_HDF5' not in os.environ): self._write = self._write_parallel # Serial I/O else: self._write = self._write_serial
def __init__(self, intg, cfgsect, suffix): super().__init__(intg, cfgsect, suffix) comm, rank, root = get_comm_rank_root() # Constant variables self._constants = self.cfg.items_as('constants', float) # Underlying elements class self.elementscls = intg.system.elementscls inletname = self.cfg.getliteral(cfgsect, 'inletname') self.area = self.cfg.getfloat(cfgsect, 'area') self.mdotstar = self.cfg.getfloat( cfgsect, 'mdotstar') # Desired mass flow rate per area at inlet self.mdot = 0.0 self.hasinlet = False # Initialize rhou forcing intg.system.rhouforce = 0.0 intg.system.mdot = 0.0 intg.system.mdotold = self.mdotstar # Boundary to integrate over bc = 'pcon_{0}_p{1}'.format(inletname, intg.rallocs.prank) # Get the mesh and elements mesh, elemap = intg.system.mesh, intg.system.ele_map # Interpolation matrices and quadrature weights self._m0 = m0 = {} self._qwts = qwts = defaultdict(list) # If we have the boundary then process the interface if bc in mesh: self.hasinlet = True # Element indices and associated face normals eidxs = defaultdict(list) norms = defaultdict(list) for etype, eidx, fidx, flags in mesh[bc].astype('U4,i4,i1,i1'): eles = elemap[etype] if (etype, fidx) not in m0: facefpts = eles.basis.facefpts[fidx] m0[etype, fidx] = eles.basis.m0[facefpts] qwts[etype, fidx] = eles.basis.fpts_wts[facefpts] # Unit physical normals and their magnitudes (including |J|) npn = eles.get_norm_pnorms(eidx, fidx) mpn = eles.get_mag_pnorms(eidx, fidx) eidxs[etype, fidx].append(eidx) norms[etype, fidx].append(mpn[:, None] * npn) self._eidxs = {k: np.array(v) for k, v in eidxs.items()} self._norms = {k: np.array(v) for k, v in norms.items()}
def _get_gndofs(self): comm, rank, root = get_comm_rank_root() # Get the number of degrees of freedom in this partition ndofs = sum(self.system.ele_ndofs) # Sum to get the global number over all partitions return comm.allreduce(ndofs, op=get_mpi('sum'))
def main(): ap = ArgumentParser(prog='pyfr-sim', description='Runs a PyFR simulation') ap.add_argument('--verbose', '-v', action='count') ap.add_argument('--backend', '-b', default='cuda', help='Backend to use') ap.add_argument('--progress', '-p', action='store_true', help='show a progress bar') ap.add_argument('--nansweep', '-n', metavar='N', type=int, help='check for NaNs every N steps') sp = ap.add_subparsers(help='sub-command help') ap_run = sp.add_parser('run', help='run --help') ap_run.add_argument('mesh', help='mesh file') ap_run.add_argument('cfg', type=FileType('r'), help='config file') ap_run.set_defaults(process=process_run) ap_restart = sp.add_parser('restart', help='restart --help') ap_restart.add_argument('mesh', help='mesh file') ap_restart.add_argument('soln', help='solution file') ap_restart.add_argument('cfg', nargs='?', type=FileType('r'), help='new config file') ap_restart.set_defaults(process=process_restart) # Parse the arguments args = ap.parse_args() mesh, soln, cfg = args.process(args) # Create a backend backend = get_backend(args.backend, cfg) # Bring up MPI (this must be done after we have created a backend) mpiutil.init() # Get the mapping from physical ranks to MPI ranks rallocs = get_rank_allocation(mesh, cfg) # Construct the solver solver = get_solver(backend, rallocs, mesh, soln, cfg) # If we are running interactively then create a progress bar if args.progress and mpiutil.get_comm_rank_root()[1] == 0: pb = ProgressBar(solver.tstart, solver.tcurr, solver.tend) # Register a callback to update the bar after each step callb = lambda intg: pb.advance_to(intg.tcurr) solver.completed_step_handlers.append(callb) # NaN sweeping if args.nansweep: def nansweep(intg): if intg.nsteps % args.nansweep == 0: if any(np.isnan(np.sum(s)) for s in intg.soln): raise RuntimeError('NaNs detected at t = {}' .format(intg.tcurr)) solver.completed_step_handlers.append(nansweep) # Execute! solver.run()
def __init__(self, intg, cfgsect, suffix=None): super().__init__(intg, cfgsect, suffix) comm, rank, root = get_comm_rank_root() # Underlying system system = intg.system # Underlying system elements class self.elementscls = system.elementscls # Expressions to integrate c = self.cfg.items_as('constants', float) self.exprs = [ self.cfg.getexpr(cfgsect, k, subs=c) for k in self.cfg.items(cfgsect) if k.startswith('int-') ] # Integration region pre-processing rinfo = self._prepare_region_info(intg) # Gradient pre-processing self._init_gradients(intg, rinfo) # Save a reference to the physical solution point locations self.plocs = system.ele_ploc_upts # Integration parameters self.nsteps = self.cfg.getint(cfgsect, 'nsteps') # The root rank needs to open the output file if rank == root: header = ['t'] + [ k for k in self.cfg.items(cfgsect) if k.startswith('int-') ] # Open self.outf = init_csv(self.cfg, cfgsect, ','.join(header)) # Prepare the per element-type info list self.eleinfo = [] for (ename, eles), (eset, emask) in zip(system.ele_map.items(), rinfo): # Locations of each solution point ploc = eles.ploc_at_np('upts')[..., eset] ploc = ploc.swapaxes(0, 1) # Jacobian determinants rcpdjacs = eles.rcpdjac_at_np('upts')[:, eset] # Quadature weights rname = self.cfg.get(f'solver-elements-{ename}', 'soln-pts') wts = get_quadrule(ename, rname, eles.nupts).wts # Save self.eleinfo.append((ploc, wts[:, None] / rcpdjacs, eset, emask))
def __init__(self, intg, mdata, basedir, basename, *, extn='.pyfrs'): # Base output directory and file name self.basedir = basedir self.basename = basename # Append the relevant extension if not self.basename.endswith(extn): self.basename += extn # Output counter (incremented each time write() is called) self.nout = self._restore_nout() if intg.isrestart else 0 # MPI info comm, rank, root = get_comm_rank_root() # Gather the output metadata across all ranks mdata = comm.allgather(mdata) # Parallel I/O if (h5py.get_config().mpi and 'PYFR_FORCE_SERIAL_HDF5' not in os.environ): self._write = self._write_parallel self._loc_names = loc_names = [] self._global_shape_list = [] for mrank, mfields in enumerate(mdata): prank = intg.rallocs.mprankmap[mrank] # Loop over all element types across all ranks for fname, fshape, fdtype in mfields: name = f'{fname}_p{prank}' self._global_shape_list.append((name, fshape, fdtype)) if rank == mrank: loc_names.append(name) # Serial I/O else: self._write = self._write_serial if rank == root: self._loc_info = loc_info = [] self._mpi_info = mpi_info = [] for mrank, mfields in enumerate(mdata): prank = intg.rallocs.mprankmap[mrank] for fname, fshape, fdtype in mfields: name = f'{fname}_p{prank}' if mrank == root: loc_info.append(name) else: mpi_info.append((name, mrank, fshape, fdtype))
def __call__(self, intg): # Return if no output is due if intg.nsteps % self.nsteps: return # MPI info comm, rank, root = get_comm_rank_root() # Solution matrices indexed by element type solns = dict(zip(intg.system.ele_types, intg.soln)) # Force vector f = np.zeros(self.ndims) for etype, fidx in self._m0: # Get the interpolation operator m0 = self._m0[etype, fidx] nfpts, nupts = m0.shape # Extract the relevant elements from the solution uupts = solns[etype][..., self._eidxs[etype, fidx]] # Interpolate to the face ufpts = np.dot(m0, uupts.reshape(nupts, -1)) ufpts = ufpts.reshape(nfpts, self.nvars, -1) ufpts = ufpts.swapaxes(0, 1) # Compute the pressure p = self.elementscls.conv_to_pri(ufpts, self.cfg)[-1] # Get the quadrature weights and normal vectors qwts = self._qwts[etype, fidx] norms = self._norms[etype, fidx] # Do the quadrature f += np.einsum('i...,ij,jik', qwts, p, norms) # Reduce and output if we're the root rank if rank != root: comm.Reduce(f, None, op=get_mpi('sum'), root=root) else: comm.Reduce(get_mpi('in_place'), f, op=get_mpi('sum'), root=root) # Build the row row = [intg.tcurr] + f.tolist() # Write print(','.join(str(r) for r in row), file=self.outf) # Flush to disk self.outf.flush()
def get_state(self): # MPI info comm, rank, root = get_comm_rank_root() # Solution matrices indexed by element type solns = dict(zip(self.solver.system.ele_types, self.solver.soln)) # Points we're responsible for sampling ourpts = self._ptsinfo[comm.rank] # Sample the solution matrices at these points samples = [solns[et][ui, :, ei] for _, et, (ui, ei) in ourpts] samples = self._process_samples(samples) # Gather to the root rank to give a list of points per rank samples = comm.gather(samples, root=root) # If we're the root rank process the data if rank == root: data = [] # Collate iters = [zip(pi, sp) for pi, sp in zip(self._ptsinfo, samples)] for mrank in self._ptsrank: # Unpack (ploc, etype, idx), samp = next(iters[mrank]) # Determine the physical mesh rank prank = self.solver.rallocs.mprankmap[mrank] # Prepare the output row [[x, y], [rho, rhou, rhouv, E]] row = [ploc, samp] # Append data.append(row) # Define freestream values for to be used for cylinder rho = 1.0 P = 1.0 u = 0.236 v = 0.0 e = P / rho / 0.4 + 0.5 * (u**2 + v**2) freestream = np.array([rho, rho * u, rho * v, e]) sol_data = np.zeros((128, 256, 4)) sol_data[:, :] = freestream for i in range(len(self.loc_to_idx)): idx1, idx2 = self.loc_to_idx[i] sol_data[idx1, idx2] = data[i][1] return sol_data
def __init__(self, intg, cfgsect, suffix): super().__init__(intg, cfgsect, suffix) comm, rank, root = get_comm_rank_root() # Output frequency self.nsteps = self.cfg.getint(cfgsect, 'nsteps') # The root rank needs to open the output file if rank == root: header = ['t'] + intg.system.elementscls.convarmap[self.ndims] # Open self.outf = init_csv(self.cfg, cfgsect, ','.join(header))
def __init__(self, intg, cfgsect, suffix): super().__init__(intg, cfgsect, suffix) # Underlying elements class self.elementscls = intg.system.elementscls # Output frequency self.nsteps = self.cfg.getint(cfgsect, 'nsteps') # List of points to be sampled and format self.pts = ast.literal_eval(self.cfg.get(cfgsect, 'samp-pts')) self.fmt = self.cfg.get(cfgsect, 'format', 'primitive') # MPI info comm, rank, root = get_comm_rank_root() # MPI rank responsible for each point and rank-indexed info self._ptsrank = ptsrank = [] self._ptsinfo = ptsinfo = [[] for i in range(comm.size)] # Physical location of the solution points plocs = [p.swapaxes(1, 2) for p in intg.system.ele_ploc_upts] for p in self.pts: # Find the nearest point in our partition cp = _closest_upt(intg.system.ele_types, plocs, p) # Reduce over all partitions mcp, mrank = comm.allreduce(cp, op=get_mpi('minloc')) # Store the rank responsible along with the info ptsrank.append(mrank) ptsinfo[mrank].append(mcp[1:]) # If we're the root rank then open the output file if rank == root: # Determine the file path fname = self.cfg.get(cfgsect, 'file') # Append the '.csv' extension if not fname.endswith('.csv'): fname += '.csv' # Open for appending self.outf = open(fname, 'a') # Output a header if required if (os.path.getsize(fname) == 0 and self.cfg.getbool(cfgsect, 'header', True)): print(self._header, file=self.outf)
def __init__(self, backend, systemcls, rallocs, mesh, initsoln, cfg): self.backend = backend self.rallocs = rallocs self.cfg = cfg # Sanity checks if self._controller_needs_errest and not self._stepper_has_errest: raise TypeError('Incompatible stepper/controller combination') # Start time self.tstart = cfg.getfloat('solver-time-integrator', 't0', 0.0) # Output times self.tout = sorted(range_eval(cfg.get('soln-output', 'times'))) self.tend = self.tout[-1] # Current time; defaults to tstart unless resuming a simulation if initsoln is None or 'stats' not in initsoln: self.tcurr = self.tstart else: stats = Inifile(initsoln['stats']) self.tcurr = stats.getfloat('solver-time-integrator', 'tcurr') # Cull already written output times self.tout = [t for t in self.tout if t > self.tcurr] # Ensure no time steps are in the past if self.tout[0] < self.tcurr: raise ValueError('Output times must be in the future') # Determine the amount of temp storage required by thus method nreg = self._stepper_nregs # Construct the relevant mesh partition self.system = systemcls(backend, rallocs, mesh, initsoln, nreg, cfg) # Extract the UUID of the mesh (to be saved with solutions) self._mesh_uuid = mesh['mesh_uuid'] # Get a queue for subclasses to use self._queue = backend.queue() # Get the number of degrees of freedom in this partition ndofs = sum(self.system.ele_ndofs) comm, rank, root = get_comm_rank_root() # Sum to get the global number over all partitions self._gndofs = comm.allreduce(ndofs, op=get_mpi('sum'))
def setup_dataframe(self): self.elementscls = self.solver.system.elementscls # List of points to be sampled and format file = open(self.base_dir + '/samp_pts.txt', 'r') self.pts = eval(file.read()) self.fmt = 'not primitive' # all the configs had this as primitive but i dont think it was used # Define directory where solution snapshots should be saved self.save_dir = 'sol_data' if self.baseline_file is not None: f = h5py.File(self.baseline_file, 'r') self.goal_state = np.array(f['sol_data']) else: self.goal_state = None # Initial omega self.solver.system.omega = 0 # MPI info comm, rank, root = get_comm_rank_root() # MPI rank responsible for each point and rank-indexed info self._ptsrank = ptsrank = [] self._ptsinfo = ptsinfo = [[] for i in range(comm.size)] # Physical location of the solution points plocs = [p.swapaxes(1, 2) for p in self.solver.system.ele_ploc_upts] # Load map from point to index with open(self.base_dir + '/loc_to_idx.json') as loc_to_idx: loc_to_idx_str = json.load(loc_to_idx, ) self.loc_to_idx = dict() for key in loc_to_idx_str: self.loc_to_idx[int(key)] = loc_to_idx_str[key] # Locate the closest solution points in our partition closest = _closest_upts(self.solver.system.ele_types, plocs, self.pts) # Process these points for cp in closest: # Reduce over the distance _, mrank = comm.allreduce((cp[0], rank), op=get_mpi('minloc')) # Store the rank responsible along with its info ptsrank.append(mrank) ptsinfo[mrank].append( comm.bcast(cp[1:] if rank == mrank else None, root=mrank))
def __init__(self, intg, cfgsect, prefix): super().__init__(intg, cfgsect, prefix) self.flushsteps = self.cfg.getint(self.cfgsect, 'flushsteps', 500) self.count = 0 self.stats = [] self.tprev = intg.tcurr # MPI info comm, rank, root = get_comm_rank_root() # The root rank needs to open the output file if rank == root: self.outf = init_csv(self.cfg, cfgsect, 'n,t,dt,action,error')
def _resid(self, x, y): comm, rank, root = get_comm_rank_root() # Get an errest kern to compute the square of the maximum residual errest = self._get_errest_kerns() # Prepare and run the kernel self._prepare_reg_banks(x, y, y) self._queue % errest(self._pseudo_aresid, self._pseudo_rresid) # Reduce locally (element types) and globally (MPI ranks) rl = max(errest.retval) rg = comm.allreduce(rl, op=get_mpi('max')) # Normalise return math.sqrt(rg)
def _write_parallel(self, path, data, metadata): comm, rank, root = get_comm_rank_root() info = self._prepare_data_info(data) # If we are the root rank then process any metadata if rank == root: data = dict(data) for k, v in metadata.items(): if isinstance(v, str): data[k] = np.array(v.encode(), dtype='S') info[k] = ((), data[k].dtype.str) else: data[k] = v info[k] = (v.shape, v.dtype.str) elif metadata: raise ValueError('Metadata must be written by the root rank') # Distribute the data info to all of the ranks ginfo = comm.allgather(info) with h5py.File(path, 'w', driver='mpio', comm=comm) as f: # Parallel HDF5 requires that data sets be created collectively for minfo in ginfo: for name, (shape, dtype) in minfo.items(): f.create_dataset(name, shape, dtype=dtype) # Write out our local data for name, dat in zip(info, data.values()): fdata = f[name] if dat.shape: nrows = len(dat) rowsz = dat.nbytes // nrows rstep = 2 * 1024**3 // rowsz if rstep == 0: raise IOError('Array is too large for parallel I/O') for ix in range(0, nrows, rstep): fdata[ix:ix + rstep] = dat[ix:ix + rstep] else: fdata.write_direct(dat) # Wait for everyone to finish writing comm.barrier()
def _invoke_postaction(self, **kwargs): comm, rank, root = get_comm_rank_root() # If we have a post-action and are the root rank then fire it if rank == root and self.postact: # If a post-action is currently running then wait for it if self.postactaid is not None: prefork.wait(self.postactaid) # Prepare the command line cmdline = shlex.split(self.postact.format(**kwargs)) # Invoke if self.postactmode == 'blocking': prefork.call(cmdline) else: self.postactaid = prefork.call_async(cmdline)
def __call__(self, intg): if intg.tcurr - self.tout_last < self.dt_out - self.tol: return comm, rank, root = get_comm_rank_root() # If we are the root rank then prepare the metadata if rank == root: stats = Inifile() stats.set('data', 'fields', ','.join(self.fields)) stats.set('data', 'prefix', 'soln') intg.collect_stats(stats) metadata = dict(intg.cfgmeta, stats=stats.tostr(), mesh_uuid=intg.mesh_uuid) else: metadata = None # Fetch data from other plugins and add it to metadata with ad-hoc keys for csh in intg.completed_step_handlers: try: prefix = intg.get_plugin_data_prefix(csh.name, csh.suffix) pdata = csh.serialise(intg) except AttributeError: pdata = {} if rank == root: metadata |= {f'{prefix}/{k}': v for k, v in pdata.items()} # Fetch and (if necessary) subset the solution data = dict(self._ele_region_data) for idx, etype, rgn in self._ele_regions: data[etype] = intg.soln[idx][..., rgn].astype(self.fpdtype) # Write out the file solnfname = self._writer.write(data, intg.tcurr, metadata) # If a post-action has been registered then invoke it self._invoke_postaction(intg=intg, mesh=intg.system.mesh.fname, soln=solnfname, t=intg.tcurr) # Update the last output time self.tout_last = intg.tcurr
def _invoke_postaction(self, **kwargs): comm, rank, root = get_comm_rank_root() # If we have a post-action and are the root rank then fire it if rank == root and self.postact: # If a post-action is currently running then wait for it if self.postactaid is not None: prefork.wait(self.postactaid) # Prepare the command line cmdline = shlex.split(self.postact.format(**kwargs)) # Invoke if self.postactmode == "blocking": prefork.call(cmdline) else: self.postactaid = prefork.call_async(cmdline)
def __init__(self, intg, cfgsect, prefix): super().__init__(intg, cfgsect, prefix) self.flushsteps = self.cfg.getint(self.cfgsect, 'flushsteps', 500) self.count = 0 self.stats = [] self.tprev = intg.tcurr # MPI info comm, rank, root = get_comm_rank_root() # The root rank needs to open the output file if rank == root: self.outf = init_csv(self.cfg, cfgsect, 'n,t,dt,action,error') else: self.outf = None
def __call__(self, intg): # Return if no output is due if intg.nacptsteps % self.nsteps: return # MPI info comm, rank, root = get_comm_rank_root() # Solution matrices indexed by element type solns = dict(zip(intg.system.ele_types, intg.soln)) # Points we're responsible for sampling ourpts = self._ptsinfo[comm.rank] # Sample the solution matrices at these points samples = [solns[et][ui, :, ei] for _, et, (ui, ei) in ourpts] samples = self._process_samples(samples) # Gather to the root rank to give a list of points per rank samples = comm.gather(samples, root=root) # If we're the root rank then output if rank == root: # Collate iters = [zip(pi, sp) for pi, sp in zip(self._ptsinfo, samples)] for mrank in self._ptsrank: # Unpack (ploc, etype, idx), samp = next(iters[mrank]) # Determine the physical mesh rank prank = intg.rallocs.mprankmap[mrank] # Write the output row print(intg.tcurr, *ploc, prank, etype, *idx, *samp, sep=',', file=self.outf) # Flush to disk self.outf.flush()
def __init__(self, intg, cfgsect, prefix): super().__init__(intg, cfgsect, prefix) self.flushsteps = self.cfg.getint(self.cfgsect, 'flushsteps', 500) self.count = 0 self.stats = [] self.tprev = intg.tcurr fvars = ','.join(intg.system.elementscls.convarmap[self.ndims]) # MPI info comm, rank, root = get_comm_rank_root() # The root rank needs to open the output file if rank == root: self.outf = init_csv(self.cfg, cfgsect, 'n,t,i,' + fvars) else: self.outf = None
def output(self, solnmap, stats): comm, rank, root = get_comm_rank_root() # Convert the config and stats objects to strings if rank == root: metadata = dict(config=self._cfg.tostr(), stats=stats.tostr(), mesh_uuid=self._mesh_uuid) else: metadata = None # Determine the output path path = self._get_output_path() # Delegate to _write to do the actual outputting self._write(path, solnmap, metadata) # Increment the output number self.nout += 1
def __init__(self, backend, systemcls, rallocs, mesh, initsoln, cfg): self.backend = backend self.rallocs = rallocs self.cfg = cfg self.isrestart = initsoln is not None # Sanity checks if self._controller_needs_errest and not self._stepper_has_errest: raise TypeError('Incompatible stepper/controller combination') # Start time self.tstart = cfg.getfloat('solver-time-integrator', 'tstart', 0.0) self.tend = cfg.getfloat('solver-time-integrator', 'tend') # Current time; defaults to tstart unless restarting if self.isrestart: stats = Inifile(initsoln['stats']) self.tcurr = stats.getfloat('solver-time-integrator', 'tcurr') else: self.tcurr = self.tstart self.tlist = deque([self.tend]) # Determine the amount of temp storage required by thus method nreg = self._stepper_nregs # Construct the relevant mesh partition self.system = systemcls(backend, rallocs, mesh, initsoln, nreg, cfg) # Extract the UUID of the mesh (to be saved with solutions) self.mesh_uuid = mesh['mesh_uuid'] # Get a queue for subclasses to use self._queue = backend.queue() # Get the number of degrees of freedom in this partition ndofs = sum(self.system.ele_ndofs) comm, rank, root = get_comm_rank_root() # Sum to get the global number over all partitions self._gndofs = comm.allreduce(ndofs, op=get_mpi('sum'))
def __call__(self, intg): # Return if no output is due if intg.nacptsteps % self.nsteps: return # MPI info comm, rank, root = get_comm_rank_root() # Solution matrices indexed by element type solns = dict(zip(intg.system.ele_types, intg.soln)) # Points we're responsible for sampling ourpts = self._ptsinfo[comm.rank] # Sample the solution matrices at these points samples = [solns[et][ui, :, ei] for _, et, (ui, ei) in ourpts] samples = self._process_samples(samples) # Gather to the root rank to give a list of points per rank samples = comm.gather(samples, root=root) # If we're the root rank then output if rank == root: # Collate iters = [zip(pi, sp) for pi, sp in zip(self._ptsinfo, samples)] for mrank in self._ptsrank: # Unpack (ploc, etype, idx), samp = next(iters[mrank]) # Determine the physical mesh rank prank = intg.rallocs.mprankmap[mrank] # Prepare the output row row = [[intg.tcurr], ploc, [prank, etype], idx, samp] row = ','.join(str(r) for rp in row for r in rp) # Write print(row, file=self.outf) # Flush to disk self.outf.flush()
def _write(self, path, solnmap, metadata): comm, rank, root = get_comm_rank_root() if rank != root: for tag, buf in enumerate(solnmap.values()): comm.Send(buf.copy(), root, tag) else: # Recv all of the non-local solution mats MPI.Prequest.Startall(self._mpi_rreqs) MPI.Prequest.Waitall(self._mpi_rreqs) # Combine local and MPI data names = it.chain(self._loc_names, self._mpi_names) solns = it.chain(solnmap.values(), self._mpi_rbufs) # Create the output dictionary outdict = dict(zip(names, solns), **metadata) with open(path, 'wb') as f: np.savez(f, **outdict)
def _write(self, path, solnmap, metadata): comm, rank, root = get_comm_rank_root() # Create the output directory and save the config/status files if rank == root: if os.path.exists(path): rm(path) os.mkdir(path) # Write out our metadata for name, data in metadata.items(): np.save(os.path.join(path, name), data) # Wait for this to complete comm.barrier() # Save the solutions for etype, buf in solnmap.items(): solnpath = os.path.join(path, self._get_name_for_soln(etype)) np.save(solnpath, buf)
def __init__(self, intg, cfgsect, suffix): super().__init__(intg, cfgsect, suffix) # Underlying elements class self.elementscls = intg.system.elementscls # Output frequency self.nsteps = self.cfg.getint(cfgsect, 'nsteps') # List of points to be sampled and format self.pts = self.cfg.getliteral(cfgsect, 'samp-pts') self.fmt = self.cfg.get(cfgsect, 'format', 'primitive') # MPI info comm, rank, root = get_comm_rank_root() # MPI rank responsible for each point and rank-indexed info self._ptsrank = ptsrank = [] self._ptsinfo = ptsinfo = [[] for i in range(comm.size)] # Physical location of the solution points plocs = [p.swapaxes(1, 2) for p in intg.system.ele_ploc_upts] # Locate the closest solution points in our partition closest = _closest_upts(intg.system.ele_types, plocs, self.pts) # Process these points for cp in closest: # Reduce over the distance _, mrank = comm.allreduce((cp[0], rank), op=get_mpi('minloc')) # Store the rank responsible along with its info ptsrank.append(mrank) ptsinfo[mrank].append( comm.bcast(cp[1:] if rank == mrank else None, root=mrank) ) # If we're the root rank then open the output file if rank == root: self.outf = init_csv(self.cfg, cfgsect, self._header)
def __call__(self, intg): # If an output is due this step if intg.nacptsteps % self.nsteps == 0 and intg.nacptsteps: # MPI info comm, rank, root = get_comm_rank_root() # Previous and current solution prev = self._prev curr = intg.soln # Square of the residual vector for each variable resid = sum(np.linalg.norm(p - c, axis=(0, 2))**2 for p, c in zip(prev, curr)) # Reduce and, if we are the root rank, output if rank != root: comm.Reduce(resid, None, op=get_mpi('sum'), root=root) else: comm.Reduce(get_mpi('in_place'), resid, op=get_mpi('sum'), root=root) # Normalise resid = np.sqrt(resid) / (intg.tcurr - self._tprev) # Build the row row = [intg.tcurr] + resid.tolist() # Write print(','.join(str(r) for r in row), file=self.outf) # Flush to disk self.outf.flush() del self._prev, self._tprev # If an output is due next step if (intg.nacptsteps + 1) % self.nsteps == 0: self._prev = [s.copy() for s in intg.soln] self._tprev = intg.tcurr
def __call__(self, intg): # Process the sequence of rejected/accepted steps for i, (dt, act, err) in enumerate(intg.stepinfo, start=self.count): self.stats.append((i, self.tprev, dt, act, err)) # Update the total step count and save the current time self.count += len(intg.stepinfo) self.tprev = intg.tcurr comm, rank, root = get_comm_rank_root() # If we're the root rank then output if rank == root: for s in self.stats: print(','.join(str(c) for c in s), file=self.outf) # Periodically flush to disk if intg.nacptsteps % self.flushsteps == 0: self.outf.flush() # Reset the stats self.stats = []
def __init__(self, intg, cfgsect, suffix): super().__init__(intg, cfgsect, suffix) # Underlying elements class self.elementscls = intg.system.elementscls # Output frequency self.nsteps = self.cfg.getint(cfgsect, 'nsteps') # List of points to be sampled and format self.pts = ast.literal_eval(self.cfg.get(cfgsect, 'samp-pts')) self.fmt = self.cfg.get(cfgsect, 'format', 'primitive') # MPI info comm, rank, root = get_comm_rank_root() # MPI rank responsible for each point and rank-indexed info self._ptsrank = ptsrank = [] self._ptsinfo = ptsinfo = [[] for i in range(comm.size)] # Physical location of the solution points plocs = [p.swapaxes(1, 2) for p in intg.system.ele_ploc_upts] for p in self.pts: # Find the nearest point in our partition cp = _closest_upt(intg.system.ele_types, plocs, p) # Reduce over all partitions mcp, mrank = comm.allreduce(cp, op=get_mpi('minloc')) # Store the rank responsible along with the info ptsrank.append(mrank) ptsinfo[mrank].append(mcp[1:]) # If we're the root rank then open the output file if rank == root: self.outf = init_csv(self.cfg, cfgsect, self._header)
def __init__(self, intg, nvars, basedir, basename, *, prefix, extn='.pyfrs'): # Base output directory and file name self.basedir = basedir self.basename = basename # Append the relevant extension if not self.basename.endswith(extn): self.basename += extn # Prefix given to each data array in the output file self.prefix = prefix # Output counter (incremented each time write() is called) self.nout = self._restore_nout() if intg.isrestart else 0 # Copy the float type self.fpdtype = intg.backend.fpdtype # MPI info comm, rank, root = get_comm_rank_root() # Get the type and shape of each element in the partition etypes = intg.system.ele_types shapes = [(nupts, nvars, neles) for nupts, _, neles in intg.system.ele_shapes] # Gather eleinfo = comm.allgather(zip(etypes, shapes)) # Parallel I/O if (h5py.get_config().mpi and 'PYFR_FORCE_SERIAL_HDF5' not in os.environ): self._write = self._write_parallel self._loc_names = loc_names = [] self._global_shape_list = [] for mrank, meleinfo in enumerate(eleinfo): prank = intg.rallocs.mprankmap[mrank] # Loop over all element types across all ranks for etype, shape in meleinfo: name = self._get_name_for_data(etype, prank) self._global_shape_list.append((name, shape)) if rank == mrank: loc_names.append(name) # Serial I/O else: self._write = self._write_serial if rank == root: self._mpi_rbufs = mpi_rbufs = [] self._mpi_rreqs = mpi_rreqs = [] self._mpi_names = mpi_names = [] self._loc_names = loc_names = [] for mrank, meleinfo in enumerate(eleinfo): prank = intg.rallocs.mprankmap[mrank] for tag, (etype, shape) in enumerate(meleinfo): name = self._get_name_for_data(etype, prank) if mrank == root: loc_names.append(name) else: rbuf = np.empty(shape, dtype=self.fpdtype) rreq = comm.Recv_init(rbuf, mrank, tag) mpi_rbufs.append(rbuf) mpi_rreqs.append(rreq) mpi_names.append(name)