import firedrake as fd from firedrake import Constant, dx, dot, grad, COMM_WORLD from firedrake.petsc import PETSc import finat from mpi4py import MPI import numpy as np import sys from helpers import RickerWavelet, delta_expr, gauss_lobatto_legendre_cube_rule import os __all__ = ["solver_CG"] PETSc.Log().begin() if COMM_WORLD.rank == 0: if not os.path.exists("data"): os.makedirs("data") elif not os.path.isdir("data"): raise RuntimeError("Cannot create output directory, file of given name exists") COMM_WORLD.barrier() def _get_time(event, comm=COMM_WORLD): return ( comm.allreduce(PETSc.Log.Event(event).getPerfInfo()["time"], op=MPI.SUM) / comm.size )
def output_time(start, end, **kwargs): """ Used by ``explosive_source.py`` at the end of a run to record to file useful information. """ verbose = kwargs.get('verbose', False) tofile = kwargs.get('tofile', False) meshid = kwargs.get('meshid', 'default_mesh') ntimesteps = kwargs.get('ntimesteps', 0) nloops = kwargs.get('nloops', 0) tile_size = kwargs.get('tile_size', 0) partitioning = kwargs.get('partitioning', 'chunk') extra_halo = 'yes' if kwargs.get('extra_halo', False) else 'no' explicit_mode = kwargs.get('explicit_mode', None) glb_maps = 'yes' if kwargs.get('glb_maps', False) else 'no' poly_order = kwargs.get('poly_order', -1) domain = kwargs.get('domain', 'default_domain') coloring = kwargs.get('coloring', 'default') prefetch = 'yes' if kwargs.get('prefetch', False) else 'no' function_spaces = kwargs.get('function_spaces', []) backend = os.environ.get("SLOPE_BACKEND", "SEQUENTIAL") name = os.path.splitext(os.path.basename(sys.argv[0]))[0] # Cut away the extension avg = lambda v: (sum(v) / len(v)) if v else 0.0 # Where do I store the output ? output_dir = os.getcwd() # Find number of processes, and number of threads per process rank = MPI.COMM_WORLD.rank num_procs = MPI.COMM_WORLD.size num_threads = int(os.environ.get("OMP_NUM_THREADS", 1)) if backend == 'OMP' else 1 # What execution mode is this? if num_procs == 1 and num_threads == 1: versions = ['sequential', 'openmp', 'mpi', 'mpi_openmp'] elif num_procs == 1 and num_threads > 1: versions = ['openmp'] elif num_procs > 1 and num_threads == 1: versions = ['mpi'] else: versions = ['mpi_openmp'] # Determine the total execution time (Python + kernel execution + MPI cost if rank in range(1, num_procs): MPI.COMM_WORLD.isend([start, end], dest=0) elif rank == 0: starts, ends = [0]*num_procs, [0]*num_procs starts[0], ends[0] = start, end for i in range(1, num_procs): starts[i], ends[i] = MPI.COMM_WORLD.recv(source=i) min_start, max_end = min(starts), max(ends) tot = round(max_end - min_start, 3) print "Time stepping: ", tot, "s" # Exit if user doesn't want timings to be recorded if not tofile: return # Determine (on rank 0): # ACT - Average Compute Time, pure kernel execution - # ACCT - Average Compute and Communication Time (ACS + MPI cost) # For this, first dump PETSc performance log info to temporary file as # currently there's no other clean way of accessing the times in petsc4py logfile = os.path.join(output_dir, 'seigenlog.py') vwr = PETSc.Viewer().createASCII(logfile) vwr.pushFormat(PETSc.Viewer().Format().ASCII_INFO_DETAIL) PETSc.Log().view(vwr) PETSc.Options().delValue('log_view') if rank == 0: with open(logfile, 'r') as f: content = f.read() exec(content) in globals(), locals() compute_times = [Stages['Main Stage']['ParLoopCKernel'][i]['time'] for i in range(num_procs)] mpi_times = [Stages['Main Stage']['ParLoopHaloEnd'][i]['time'] for i in range(num_procs)] ACT = round(avg(compute_times), 3) AMT = round(avg(mpi_times), 3) ACCT = ACT + AMT print "Average Compute Time: ", ACT, "s" print "Average Compute and Communication Time: ", ACCT, "s" # Determine if a multi-node execution platform = os.environ.get('NODENAME', 'unknown') # Adjust /tile_size/ and /version/ based on the problem that was actually run assert nloops >= 0 if nloops == 0: tile_size = 0 mode = "untiled" elif explicit_mode: mode = "fs%d" % explicit_mode else: mode = "loops%d" % nloops ### Print timings to file ### def fix(values): new_values = [] for v in values: try: new_v = int(v) except ValueError: try: new_v = float(v) except ValueError: new_v = v.strip() if new_v != '': new_values.append(new_v) return tuple(new_values) if rank == 0: for version in versions: timefile = os.path.join(output_dir, "times", name, "poly_%d" % poly_order, domain, meshid, version, platform, "np%d_nt%d.txt" % (num_procs, num_threads)) # Create directory and file (if not exist) if not os.path.exists(os.path.dirname(timefile)): os.makedirs(os.path.dirname(timefile)) if not os.path.exists(timefile): open(timefile, 'a').close() # Read the old content, add the new time value, order # everything based on <execution time, #loops tiled>, write # back to the file (overwriting existing content) with open(timefile, "r+") as f: lines = [line.split('|') for line in f if line.strip()][2:] lines = [fix(i) for i in lines] lines += [(tot, ACT, ACCT, ntimesteps, mode, tile_size, partitioning, extra_halo, glb_maps, coloring, prefetch)] lines.sort(key=lambda x: x[0]) template = "| " + "%9s | " * 11 prepend = template % ('time', 'ACT', 'ACCT', 'timesteps', 'mode', 'tilesize', 'partmode', 'extrahalo', 'glbmaps', 'coloring', 'prefetch') lines = "\n".join([prepend, '-'*133] + [template % i for i in lines]) + "\n" f.seek(0) f.write(lines) f.truncate() ### Print DoFs summary to file ### dofsfile = os.path.join(output_dir, "times", name, "dofs_summary.txt") if rank == 0 and not os.path.exists(dofsfile): with open(dofsfile, 'a') as f: f.write("poly:numprocs:[fs1_dofs;fs2_dofs;...]\n") tot_dofs = [MPI.COMM_WORLD.allreduce(fs.dof_count, op=mpi4py.MPI.SUM) for fs in function_spaces] if rank == 0: with open(dofsfile, "a") as f: f.write("%d:%d:%s\n" % (poly_order, num_procs, ';'.join([str(i) for i in tot_dofs]))) ### Print summary output to screen ### if rank == 0 and verbose: for i in range(num_procs): fs_info = ", ".join(["%s=%d" % (fs.name, fs.dof_count) for fs in function_spaces]) tot_time = compute_times[i] + mpi_times[i] offC = (ends[i] - starts[i]) - tot_time offCperc = (offC / (ends[i] - starts[i]))*100 mpiPerc = (mpi_times[i] / (ends[i] - starts[i]))*100 print "Rank %d: comp=%.2fs, mpi=%.2fs -- tot=%.2fs (py=%.2fs, %.2f%%; mpi_oh=%.2f%%; fs=[%s])" % \ (i, compute_times[i], mpi_times[i], tot_time, offC, offCperc, mpiPerc, fs_info) sys.stdout.flush() MPI.COMM_WORLD.barrier() # Clean up if rank == 0: os.remove(logfile)