def run_mpi_sim(args, inputfile, usernamespace, optparams=None): """Run mixed mode MPI/OpenMP simulation - MPI task farm for models with each model parallelised with OpenMP Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. optparams (dict): Optional argument. For Taguchi optimisation it provides the parameters to optimise and their values. """ from mpi4py import MPI # Get name of processor/host name = MPI.Get_processor_name() # Set range for number of models to run modelstart = args.restart if args.restart else 1 modelend = modelstart + args.n numbermodelruns = args.n # Number of workers and command line flag to indicate a spawned worker worker = '--mpi-worker' numberworkers = args.mpi - 1 # Master process if worker not in sys.argv: tsimstart = perf_counter() print('MPI master rank (PID {}) on {} using {} workers'.format(os.getpid(), name, numberworkers)) # Create a list of work worklist = [] for model in range(modelstart, modelend): workobj = dict() workobj['currentmodelrun'] = model if optparams: workobj['optparams'] = optparams worklist.append(workobj) # Add stop sentinels worklist += ([StopIteration] * numberworkers) # Spawn workers comm = MPI.COMM_WORLD.Spawn(sys.executable, args=['-m', 'gprMax', '-n', str(args.n)] + sys.argv[1::] + [worker], maxprocs=numberworkers) # Reply to whoever asks until done status = MPI.Status() for work in worklist: comm.recv(source=MPI.ANY_SOURCE, status=status) comm.send(obj=work, dest=status.Get_source()) # Shutdown comm.Disconnect() tsimend = perf_counter() simcompletestr = '\n=== Simulation completed in [HH:MM:SS]: {}'.format(datetime.timedelta(seconds=tsimend - tsimstart)) print('{} {}\n'.format(simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr)))) # Worker process elif worker in sys.argv: # Connect to parent try: comm = MPI.Comm.Get_parent() # get MPI communicator object rank = comm.Get_rank() # rank of this process except: raise ValueError('Could not connect to parent') # Ask for work until stop sentinel for work in iter(lambda: comm.sendrecv(0, dest=0), StopIteration): currentmodelrun = work['currentmodelrun'] gpuinfo = '' print('MPI worker rank {} (PID {}) starting model {}/{}{} on {}'.format(rank, os.getpid(), currentmodelrun, numbermodelruns, gpuinfo, name)) # If Taguchi optimistaion, add specific value for each parameter to optimise for each experiment to user accessible namespace if 'optparams' in work: tmp = {} tmp.update((key, value[currentmodelrun - 1]) for key, value in work['optparams'].items()) modelusernamespace = usernamespace.copy() modelusernamespace.update({'optparams': tmp}) else: modelusernamespace = usernamespace # Run the model run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, modelusernamespace) # Shutdown comm.Disconnect()
def run_opt_sim(args, numbermodelruns, inputfile, usernamespace): """Run a simulation using Taguchi's optmisation process. Args: args (dict): Namespace with command line arguments numbermodelruns (int): Total number of model runs. inputfile (str): Name of the input file to open. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. """ tsimstart = perf_counter() if numbermodelruns > 1: raise CmdInputError('When a Taguchi optimisation is being carried out the number of model runs argument is not required') inputfileparts = os.path.splitext(inputfile) # Default maximum number of iterations of optimisation to perform (used if the stopping criterion is not achieved) maxiterations = 20 # Process Taguchi code blocks in the input file; pass in ordered dictionary to hold parameters to optimise tmp = usernamespace.copy() tmp.update({'optparams': OrderedDict()}) taguchinamespace = taguchi_code_blocks(inputfile, tmp) # Extract dictionaries and variables containing initialisation parameters optparams = taguchinamespace['optparams'] fitness = taguchinamespace['fitness'] if 'maxiterations' in taguchinamespace: maxiterations = taguchinamespace['maxiterations'] # Store initial parameter ranges optparamsinit = list(optparams.items()) # Dictionary to hold history of optmised values of parameters optparamshist = OrderedDict((key, list()) for key in optparams) # Import specified fitness function fitness_metric = getattr(import_module('user_libs.optimisation_taguchi.fitness_functions'), fitness['name']) # Select OA OA, N, cols, k, s, t = construct_OA(optparams) taguchistr = '\n--- Taguchi optimisation' print('{} {}\n'.format(taguchistr, '-' * (get_terminal_width() - 1 - len(taguchistr)))) print('Orthogonal array: {:g} experiments per iteration, {:g} parameters ({:g} will be used), {:g} levels, and strength {:g}'.format(N, cols, k, s, t)) tmp = [(k, v) for k, v in optparams.items()] print('Parameters to optimise with ranges: {}'.format(str(tmp).strip('[]'))) print('Output name(s) from model: {}'.format(fitness['args']['outputs'])) print('Fitness function "{}" with stopping criterion {:g}'.format(fitness['name'], fitness['stop'])) print('Maximum iterations: {:g}'.format(maxiterations)) # Initialise arrays and lists to store parameters required throughout optimisation # Lower, central, and upper values for each parameter levels = np.zeros((s, k), dtype=floattype) # Optimal lower, central, or upper value for each parameter levelsopt = np.zeros(k, dtype=np.uint8) # Difference used to set values for levels levelsdiff = np.zeros(k, dtype=floattype) # History of fitness values from each confirmation experiment fitnessvalueshist = [] iteration = 0 while iteration < maxiterations: # Reset number of model runs to number of experiments numbermodelruns = N usernamespace['number_model_runs'] = numbermodelruns # Fitness values for each experiment fitnessvalues = [] # Set parameter ranges and define experiments optparams, levels, levelsdiff = calculate_ranges_experiments(optparams, optparamsinit, levels, levelsopt, levelsdiff, OA, N, k, s, iteration) # Run model for each experiment if args.mpi: # Mixed mode MPI/OpenMP - MPI task farm for models with each model parallelised with OpenMP run_mpi_sim(args, numbermodelruns, inputfile, usernamespace, optparams) else: # Standard behaviour - models run serially with each model parallelised with OpenMP run_std_sim(args, numbermodelruns, inputfile, usernamespace, optparams) # Calculate fitness value for each experiment for experiment in range(1, numbermodelruns + 1): outputfile = inputfileparts[0] + str(experiment) + '.out' fitnessvalues.append(fitness_metric(outputfile, fitness['args'])) os.remove(outputfile) taguchistr = '\n--- Taguchi optimisation, iteration {}: {} initial experiments with fitness values {}.'.format(iteration + 1, numbermodelruns, fitnessvalues) print('{} {}\n'.format(taguchistr, '-' * (get_terminal_width() - 1 - len(taguchistr)))) # Calculate optimal levels from fitness values by building a response table; update dictionary of parameters with optimal values optparams, levelsopt = calculate_optimal_levels(optparams, levels, levelsopt, fitnessvalues, OA, N, k) # Update dictionary with history of parameters with optimal values for key, value in optparams.items(): optparamshist[key].append(value[0]) # Run a confirmation experiment with optimal values numbermodelruns = 1 usernamespace['number_model_runs'] = numbermodelruns run_std_sim(args, numbermodelruns, inputfile, usernamespace, optparams) # Calculate fitness value for confirmation experiment outputfile = inputfileparts[0] + '.out' fitnessvalueshist.append(fitness_metric(outputfile, fitness['args'])) # Rename confirmation experiment output file so that it is retained for each iteraction os.rename(outputfile, os.path.splitext(outputfile)[0] + '_final' + str(iteration + 1) + '.out') taguchistr = '\n--- Taguchi optimisation, iteration {} completed. History of optimal parameter values {} and of fitness values {}'.format(iteration + 1, dict(optparamshist), fitnessvalueshist) print('{} {}\n'.format(taguchistr, '-' * (get_terminal_width() - 1 - len(taguchistr)))) iteration += 1 # Stop optimisation if stopping criterion has been reached if fitnessvalueshist[iteration - 1] > fitness['stop']: taguchistr = '\n--- Taguchi optimisation stopped as fitness criteria reached: {:g} > {:g}'.format(fitnessvalueshist[iteration - 1], fitness['stop']) print('{} {}\n'.format(taguchistr, '-' * (get_terminal_width() - 1 - len(taguchistr)))) break # Stop optimisation if successive fitness values are within a percentage threshold if iteration > 2: fitnessvaluesclose = (np.abs(fitnessvalueshist[iteration - 2] - fitnessvalueshist[iteration - 1]) / fitnessvalueshist[iteration - 1]) * 100 fitnessvaluesthres = 0.1 if fitnessvaluesclose < fitnessvaluesthres: taguchistr = '\n--- Taguchi optimisation stopped as successive fitness values within {}%'.format(fitnessvaluesthres) print('{} {}\n'.format(taguchistr, '-' * (get_terminal_width() - 1 - len(taguchistr)))) break tsimend = perf_counter() # Save optimisation parameters history and fitness values history to file opthistfile = inputfileparts[0] + '_hist.pickle' with open(opthistfile, 'wb') as f: pickle.dump(optparamshist, f) pickle.dump(fitnessvalueshist, f) pickle.dump(optparamsinit, f) taguchistr = '\n=== Taguchi optimisation completed in [HH:MM:SS]: {} after {} iteration(s)'.format(datetime.timedelta(seconds=int(tsimend - tsimstart)), iteration) print('{} {}\n'.format(taguchistr, '=' * (get_terminal_width() - 1 - len(taguchistr)))) print('History of optimal parameter values {} and of fitness values {}\n'.format(dict(optparamshist), fitnessvalueshist))
def solve_gpu(currentmodelrun, modelend, G): """Solving using FDTD method on GPU. Implemented using Nvidia CUDA. Args: currentmodelrun (int): Current model run number. modelend (int): Number of last model to run. G (class): Grid class instance - holds essential parameters describing the model. Returns: tsolve (float): Time taken to execute solving """ import pycuda.driver as drv from pycuda.compiler import SourceModule drv.init() # Create device handle and context on specifc GPU device (and make it current context) dev = drv.Device(G.gpu.deviceID) ctx = dev.make_context() # Electric and magnetic field updates - prepare kernels, and get kernel functions if Material.maxpoles > 0: kernels_fields = SourceModule( kernels_template_fields.substitute( REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3])) else: # Set to one any substitutions for dispersive materials kernels_fields = SourceModule( kernels_template_fields.substitute( REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1)) update_e_gpu = kernels_fields.get_function("update_e") update_h_gpu = kernels_fields.get_function("update_h") # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for fields kernels updatecoeffsE = kernels_fields.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_fields.get_global('updatecoeffsH')[0] if G.updatecoeffsE.nbytes + G.updatecoeffsH.nbytes > G.gpu.constmem: raise GeneralError( 'Too many materials in the model to fit onto constant memory of size {} on {} - {} GPU' .format(human_size(G.gpu.constmem), G.gpu.deviceID, G.gpu.name)) else: drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE) drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH) # Electric and magnetic field updates - dispersive materials - get kernel functions if Material.maxpoles > 0: # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values). update_e_dispersive_A_gpu = kernels_fields.get_function( "update_e_dispersive_A") update_e_dispersive_B_gpu = kernels_fields.get_function( "update_e_dispersive_B") G.gpu_initialise_dispersive_arrays() # Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU G.gpu_set_blocks_per_grid() G.gpu_initialise_arrays() # PML updates if G.pmls: # Prepare kernels kernels_pml = SourceModule( kernels_template_pml.substitute( REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_R=G.pmls[0].ERA.shape[1], NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3])) # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels updatecoeffsE = kernels_pml.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_pml.get_global('updatecoeffsH')[0] drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE) drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH) # Set block per grid, initialise arrays on GPU, and get kernel functions for pml in G.pmls: pml.gpu_set_blocks_per_grid(G) pml.gpu_initialise_arrays() pml.gpu_get_update_funcs(kernels_pml) # Receivers if G.rxs: # Initialise arrays on GPU rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G) # Prepare kernel and get kernel function kernel_store_outputs = SourceModule( kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2])) store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") # Sources - initialise arrays on GPU, prepare kernel and get kernel functions if G.voltagesources + G.hertziandipoles + G.magneticdipoles: kernels_sources = SourceModule( kernels_template_sources.substitute( REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.Ex.shape[0], NY_FIELDS=G.Ex.shape[1], NZ_FIELDS=G.Ex.shape[2], NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3])) # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0] drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE) drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH) if G.hertziandipoles: srcinfo1_hertzian_gpu, srcinfo2_hertzian_gpu, srcwaves_hertzian_gpu = gpu_initialise_src_arrays( G.hertziandipoles, G) update_hertzian_dipole_gpu = kernels_sources.get_function( "update_hertzian_dipole") if G.magneticdipoles: srcinfo1_magnetic_gpu, srcinfo2_magnetic_gpu, srcwaves_magnetic_gpu = gpu_initialise_src_arrays( G.magneticdipoles, G) update_magnetic_dipole_gpu = kernels_sources.get_function( "update_magnetic_dipole") if G.voltagesources: srcinfo1_voltage_gpu, srcinfo2_voltage_gpu, srcwaves_voltage_gpu = gpu_initialise_src_arrays( G.voltagesources, G) update_voltage_source_gpu = kernels_sources.get_function( "update_voltage_source") # Iteration loop timer iterstart = drv.Event() iterend = drv.Event() iterstart.record() for iteration in tqdm(range(G.iterations), desc='Running simulation, model ' + str(currentmodelrun) + '/' + str(modelend), ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable): # Store field component values for every receiver if G.rxs: store_outputs_gpu(np.int32(len(G.rxs)), np.int32(iteration), rxcoords_gpu.gpudata, rxs_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.rxs)), 1, 1)) # Update magnetic field components update_h_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=G.tpb, grid=G.bpg) # Update magnetic field components with the PML correction for pml in G.pmls: pml.gpu_update_magnetic(G) # Update magnetic field components for magetic dipole sources if G.magneticdipoles: update_magnetic_dipole_gpu(np.int32(len(G.magneticdipoles)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_magnetic_gpu.gpudata, srcinfo2_magnetic_gpu.gpudata, srcwaves_magnetic_gpu.gpudata, G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.magneticdipoles)), 1, 1)) # Update electric field components if Material.maxpoles == 0: # If all materials are non-dispersive do standard update update_e_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=G.tpb, grid=G.bpg) else: # If there are any dispersive materials do 1st part of dispersive update (it is split into two parts as it requires present and updated electric field values). update_e_dispersive_A_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata, G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=G.tpb, grid=G.bpg) # Update electric field components with the PML correction for pml in G.pmls: pml.gpu_update_electric(G) # Update electric field components for voltage sources if G.voltagesources: update_voltage_source_gpu(np.int32(len(G.voltagesources)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_voltage_gpu.gpudata, srcinfo2_voltage_gpu.gpudata, srcwaves_voltage_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.voltagesources)), 1, 1)) # Update electric field components for Hertzian dipole sources (update any Hertzian dipole sources last) if G.hertziandipoles: update_hertzian_dipole_gpu(np.int32(len(G.hertziandipoles)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_hertzian_gpu.gpudata, srcinfo2_hertzian_gpu.gpudata, srcwaves_hertzian_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.hertziandipoles)), 1, 1)) # If there are any dispersive materials do 2nd part of dispersive update (it is split into two parts as it requires present and updated electric field values). Therefore it can only be completely updated after the electric field has been updated by the PML and source updates. if Material.maxpoles > 0: update_e_dispersive_B_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata, G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=G.tpb, grid=G.bpg) # Copy output from receivers array back to correct receiver objects gpu_get_rx_array(rxs_gpu.get(), rxcoords_gpu.get(), G) iterend.record() iterend.synchronize() tsolve = iterstart.time_till(iterend) * 1e-3 # Remove context from top of stack and delete ctx.pop() del ctx return tsolve
def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usernamespace): """Runs a model - processes the input file; builds the Yee cells; calculates update coefficients; runs main FDTD loop. Args: args (dict): Namespace with command line arguments currentmodelrun (int): Current model run number. modelend (int): Number of last model to run. numbermodelruns (int): Total number of model runs. inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. Returns: tsolve (int): Length of time (seconds) of main FDTD calculations """ # Monitor memory usage p = psutil.Process() # Declare variable to hold FDTDGrid class global G # Used for naming geometry and output files appendmodelnumber = '' if numbermodelruns == 1 and not args.task and not args.restart else str( currentmodelrun) # Normal model reading/building process; bypassed if geometry information to be reused if 'G' not in globals(): # Initialise an instance of the FDTDGrid class G = FDTDGrid() # Get information about host machine G.hostinfo = get_host_info() # Single GPU object if args.gpu: G.gpu = args.gpu G.inputfilename = os.path.split(inputfile.name)[1] G.inputdirectory = os.path.dirname(os.path.abspath(inputfile.name)) inputfilestr = '\n--- Model {}/{}, input file: {}'.format( currentmodelrun, modelend, inputfile.name) print(Fore.GREEN + '{} {}\n'.format( inputfilestr, '-' * (get_terminal_width() - 1 - len(inputfilestr))) + Style.RESET_ALL) # Add the current model run to namespace that can be accessed by # user in any Python code blocks in input file usernamespace['current_model_run'] = currentmodelrun # Read input file and process any Python and include file commands processedlines = process_python_include_code(inputfile, usernamespace) # Print constants/variables in user-accessable namespace uservars = '' for key, value in sorted(usernamespace.items()): if key != '__builtins__': uservars += '{}: {}, '.format(key, value) print( 'Constants/variables used/available for Python scripting: {{{}}}\n' .format(uservars[:-2])) # Write a file containing the input commands after Python or include file commands have been processed if args.write_processed: write_processed_file(processedlines, appendmodelnumber, G) # Check validity of command names and that essential commands are present singlecmds, multicmds, geometry = check_cmd_names(processedlines) # Create built-in materials m = Material(0, 'pec') m.se = float('inf') m.type = 'builtin' m.averagable = False G.materials.append(m) m = Material(1, 'free_space') m.type = 'builtin' G.materials.append(m) # Process parameters for commands that can only occur once in the model process_singlecmds(singlecmds, G) # Process parameters for commands that can occur multiple times in the model print() process_multicmds(multicmds, G) # Initialise an array for volumetric material IDs (solid), boolean # arrays for specifying materials not to be averaged (rigid), # an array for cell edge IDs (ID) G.initialise_geometry_arrays() # Initialise arrays for the field components G.initialise_field_arrays() # Process geometry commands in the order they were given process_geometrycmds(geometry, G) # Build the PMLs and calculate initial coefficients print() if all(value == 0 for value in G.pmlthickness.values()): if G.messages: print('PML boundaries: switched off') pass # If all the PMLs are switched off don't need to build anything else: if G.messages: if all(value == G.pmlthickness['x0'] for value in G.pmlthickness.values()): pmlinfo = str(G.pmlthickness['x0']) + ' cells' else: pmlinfo = '' for key, value in G.pmlthickness.items(): pmlinfo += '{}: {} cells, '.format(key, value) pmlinfo = pmlinfo[:-2] print('PML boundaries: {}'.format(pmlinfo)) pbar = tqdm(total=sum(1 for value in G.pmlthickness.values() if value > 0), desc='Building PML boundaries', ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable) build_pmls(G, pbar) pbar.close() # Build the model, i.e. set the material properties (ID) for every edge # of every Yee cell print() pbar = tqdm(total=2, desc='Building main grid', ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable) build_electric_components(G.solid, G.rigidE, G.ID, G) pbar.update() build_magnetic_components(G.solid, G.rigidH, G.ID, G) pbar.update() pbar.close() # Process any voltage sources (that have resistance) to create a new # material at the source location for voltagesource in G.voltagesources: voltagesource.create_material(G) # Initialise arrays of update coefficients to pass to update functions G.initialise_std_update_coeff_arrays() # Initialise arrays of update coefficients and temporary values if # there are any dispersive materials if Material.maxpoles != 0: # Update estimated memory (RAM) usage memestimate = memory_usage(G) # Check if model can be built and/or run on host if memestimate > G.hostinfo['ram']: raise GeneralError( 'Estimated memory (RAM) required ~{} exceeds {} detected!\n' .format( human_size(memestimate), human_size(G.hostinfo['ram'], a_kilobyte_is_1024_bytes=True))) # Check if model can be run on specified GPU if required if G.gpu is not None: if memestimate > G.gpu.totalmem: raise GeneralError( 'Estimated memory (RAM) required ~{} exceeds {} detected on specified {} - {} GPU!\n' .format( human_size(memestimate), human_size(G.gpu.totalmem, a_kilobyte_is_1024_bytes=True), G.gpu.deviceID, G.gpu.name)) if G.messages: print('Estimated memory (RAM) required: ~{}'.format( human_size(memestimate))) G.initialise_dispersive_arrays() # Process complete list of materials - calculate update coefficients, # store in arrays, and build text list of materials/properties materialsdata = process_materials(G) if G.messages: print('\nMaterials:') materialstable = AsciiTable(materialsdata) materialstable.outer_border = False materialstable.justify_columns[0] = 'right' print(materialstable.table) # Check to see if numerical dispersion might be a problem results = dispersion_analysis(G) if results['error']: print( Fore.RED + "\nWARNING: Numerical dispersion analysis not carried out as {}" .format(results['error']) + Style.RESET_ALL) elif results['N'] < G.mingridsampling: raise GeneralError( "Non-physical wave propagation: Material '{}' has wavelength sampled by {} cells, less than required minimum for physical wave propagation. Maximum significant frequency estimated as {:g}Hz" .format(results['material'].ID, results['N'], results['maxfreq'])) elif results['deltavp'] and np.abs( results['deltavp']) > G.maxnumericaldisp: print( Fore.RED + "\nWARNING: Potentially significant numerical dispersion. Estimated largest physical phase-velocity error is {:.2f}% in material '{}' whose wavelength sampled by {} cells. Maximum significant frequency estimated as {:g}Hz" .format(results['deltavp'], results['material'].ID, results['N'], results['maxfreq']) + Style.RESET_ALL) elif results['deltavp'] and G.messages: print( "\nNumerical dispersion analysis: estimated largest physical phase-velocity error is {:.2f}% in material '{}' whose wavelength sampled by {} cells. Maximum significant frequency estimated as {:g}Hz" .format(results['deltavp'], results['material'].ID, results['N'], results['maxfreq'])) # If geometry information to be reused between model runs else: inputfilestr = '\n--- Model {}/{}, input file (not re-processed, i.e. geometry fixed): {}'.format( currentmodelrun, modelend, inputfile.name) print(Fore.GREEN + '{} {}\n'.format( inputfilestr, '-' * (get_terminal_width() - 1 - len(inputfilestr))) + Style.RESET_ALL) # Clear arrays for field components G.initialise_field_arrays() # Clear arrays for fields in PML for pml in G.pmls: pml.initialise_field_arrays() # Adjust position of simple sources and receivers if required if G.srcsteps[0] != 0 or G.srcsteps[1] != 0 or G.srcsteps[2] != 0: for source in itertools.chain(G.hertziandipoles, G.magneticdipoles): if currentmodelrun == 1: if source.xcoord + G.srcsteps[ 0] * modelend < 0 or source.xcoord + G.srcsteps[ 0] * modelend > G.nx or source.ycoord + G.srcsteps[ 1] * modelend < 0 or source.ycoord + G.srcsteps[ 1] * modelend > G.ny or source.zcoord + G.srcsteps[ 2] * modelend < 0 or source.zcoord + G.srcsteps[ 2] * modelend > G.nz: raise GeneralError( 'Source(s) will be stepped to a position outside the domain.' ) source.xcoord = source.xcoordorigin + (currentmodelrun - 1) * G.srcsteps[0] source.ycoord = source.ycoordorigin + (currentmodelrun - 1) * G.srcsteps[1] source.zcoord = source.zcoordorigin + (currentmodelrun - 1) * G.srcsteps[2] if G.rxsteps[0] != 0 or G.rxsteps[1] != 0 or G.rxsteps[2] != 0: for receiver in G.rxs: if currentmodelrun == 1: if receiver.xcoord + G.rxsteps[ 0] * modelend < 0 or receiver.xcoord + G.rxsteps[ 0] * modelend > G.nx or receiver.ycoord + G.rxsteps[ 1] * modelend < 0 or receiver.ycoord + G.rxsteps[ 1] * modelend > G.ny or receiver.zcoord + G.rxsteps[ 2] * modelend < 0 or receiver.zcoord + G.rxsteps[ 2] * modelend > G.nz: raise GeneralError( 'Receiver(s) will be stepped to a position outside the domain.' ) receiver.xcoord = receiver.xcoordorigin + (currentmodelrun - 1) * G.rxsteps[0] receiver.ycoord = receiver.ycoordorigin + (currentmodelrun - 1) * G.rxsteps[1] receiver.zcoord = receiver.zcoordorigin + (currentmodelrun - 1) * G.rxsteps[2] # Write files for any geometry views and geometry object outputs if not (G.geometryviews or G.geometryobjectswrite) and args.geometry_only: print( Fore.RED + '\nWARNING: No geometry views or geometry objects to output found.' + Style.RESET_ALL) if G.geometryviews: print() for i, geometryview in enumerate(G.geometryviews): geometryview.set_filename(appendmodelnumber, G) pbar = tqdm(total=geometryview.datawritesize, unit='byte', unit_scale=True, desc='Writing geometry view file {}/{}, {}'.format( i + 1, len(G.geometryviews), os.path.split(geometryview.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable) geometryview.write_vtk(G, pbar) pbar.close() if G.geometryobjectswrite: for i, geometryobject in enumerate(G.geometryobjectswrite): pbar = tqdm(total=geometryobject.datawritesize, unit='byte', unit_scale=True, desc='Writing geometry object file {}/{}, {}'.format( i + 1, len(G.geometryobjectswrite), os.path.split(geometryobject.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable) geometryobject.write_hdf5(G, pbar) pbar.close() # If only writing geometry information if args.geometry_only: tsolve = 0 # Run simulation else: # Prepare any snapshot files for snapshot in G.snapshots: snapshot.prepare_vtk_imagedata(appendmodelnumber, G) # Output filename inputfileparts = os.path.splitext( os.path.join(G.inputdirectory, G.inputfilename)) outputfile = inputfileparts[0] + appendmodelnumber + '.out' print('\nOutput file: {}\n'.format(outputfile)) # Main FDTD solving functions for either CPU or GPU if G.gpu is None: tsolve = solve_cpu(currentmodelrun, modelend, G) else: tsolve = solve_gpu(currentmodelrun, modelend, G) # Write an output file in HDF5 format write_hdf5_outputfile(outputfile, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G) if G.messages: print('Memory (RAM) used: ~{}'.format( human_size(p.memory_info().rss))) print('Solving time [HH:MM:SS]: {}'.format( datetime.timedelta(seconds=tsolve))) # If geometry information to be reused between model runs then FDTDGrid # class instance must be global so that it persists if not args.geometry_fixed: del G return tsolve
def run_benchmark_sim(args, inputfile, usernamespace): """ Run standard simulation in benchmarking mode - models are run one after another and each model is parallelised using either OpenMP (CPU) or CUDA (GPU) Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. """ # Get information about host machine hostinfo = get_host_info() hyperthreading = ', {} cores with Hyper-Threading'.format( hostinfo['logicalcores']) if hostinfo['hyperthreading'] else '' machineIDlong = '{}; {} x {} ({} cores{}); {} RAM; {}'.format( hostinfo['machineID'], hostinfo['sockets'], hostinfo['cpuID'], hostinfo['physicalcores'], hyperthreading, human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion']) # Initialise arrays to hold CPU thread info and times, and GPU info and times cputhreads = np.array([], dtype=np.int32) cputimes = np.array([]) gpuIDs = [] gputimes = np.array([]) # CPU only benchmarking if args.gpu is None: # Number of CPU threads to benchmark - start from single thread and double threads until maximum number of physical cores threads = 1 maxthreads = hostinfo['physicalcores'] maxthreadspersocket = hostinfo['physicalcores'] / hostinfo['sockets'] while threads < maxthreadspersocket: cputhreads = np.append(cputhreads, int(threads)) threads *= 2 # Check for system with only single thread if cputhreads.size == 0: cputhreads = np.append(cputhreads, threads) # Add maxthreadspersocket and maxthreads if necessary if cputhreads[-1] != maxthreadspersocket: cputhreads = np.append(cputhreads, int(maxthreadspersocket)) if cputhreads[-1] != maxthreads: cputhreads = np.append(cputhreads, int(maxthreads)) cputhreads = cputhreads[::-1] cputimes = np.zeros(len(cputhreads)) numbermodelruns = len(cputhreads) # GPU only benchmarking else: # Set size of array to store GPU runtimes and number of runs of model required if isinstance(args.gpu, list): for gpu in args.gpu: gpuIDs.append(gpu.name) gputimes = np.zeros(len(args.gpu)) numbermodelruns = len(args.gpu) else: gpuIDs.append(args.gpu.name) gputimes = np.zeros(1) numbermodelruns = 1 # Store GPU information in a temp variable gpus = args.gpu usernamespace['number_model_runs'] = numbermodelruns modelend = numbermodelruns + 1 for currentmodelrun in range(1, modelend): # Run CPU benchmark if args.gpu is None: os.environ['OMP_NUM_THREADS'] = str(cputhreads[currentmodelrun - 1]) cputimes[currentmodelrun - 1] = run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, usernamespace) # Run GPU benchmark else: if isinstance(gpus, list): args.gpu = gpus[(currentmodelrun - 1)] else: args.gpu = gpus os.environ['OMP_NUM_THREADS'] = str(hostinfo['physicalcores']) gputimes[(currentmodelrun - 1)] = run_model( args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, usernamespace) # Get model size (in cells) and number of iterations if currentmodelrun == 1: if numbermodelruns == 1: outputfile = os.path.splitext(args.inputfile)[0] + '.out' else: outputfile = os.path.splitext( args.inputfile)[0] + str(currentmodelrun) + '.out' f = h5py.File(outputfile, 'r') iterations = f.attrs['Iterations'] numcells = f.attrs['nx, ny, nz'] # Save number of threads and benchmarking times to NumPy archive np.savez(os.path.splitext(inputfile.name)[0], machineID=machineIDlong, gpuIDs=gpuIDs, cputhreads=cputhreads, cputimes=cputimes, gputimes=gputimes, iterations=iterations, numcells=numcells, version=__version__) simcompletestr = '\n=== Simulation completed' print('{} {}\n'.format( simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr))))
def solve_cpu(currentmodelrun, modelend, G): """ Solving using FDTD method on CPU. Parallelised using Cython (OpenMP) for electric and magnetic field updates, and PML updates. Args: currentmodelrun (int): Current model run number. modelend (int): Number of last model to run. G (class): Grid class instance - holds essential parameters describing the model. Returns: tsolve (float): Time taken to execute solving """ tsolvestart = perf_counter() for iteration in tqdm(range(G.iterations), desc='Running simulation, model ' + str(currentmodelrun) + '/' + str(modelend), ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable): # Store field component values for every receiver and transmission line store_outputs(iteration, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G) # Write any snapshots to file for i, snap in enumerate(G.snapshots): if snap.time == iteration + 1: snapiters = 36 * (((snap.xf - snap.xs) / snap.dx) * ((snap.yf - snap.ys) / snap.dy) * ((snap.zf - snap.zs) / snap.dz)) pbar = tqdm(total=snapiters, leave=False, unit='byte', unit_scale=True, desc=' Writing snapshot file {} of {}, {}'.format( i + 1, len(G.snapshots), os.path.split(snap.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable) snap.write_vtk_imagedata(G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G, pbar) pbar.close() # Update magnetic field components update_magnetic(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsH, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) # Update magnetic field components with the PML correction for pml in G.pmls: pml.update_magnetic(G) # Update magnetic field components from sources for source in G.transmissionlines + G.magneticdipoles: source.update_magnetic(iteration, G.updatecoeffsH, G.ID, G.Hx, G.Hy, G.Hz, G) # Update electric field components # All materials are non-dispersive so do standard update if Material.maxpoles == 0: update_electric(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsE, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) # If there are any dispersive materials do 1st part of dispersive update # (it is split into two parts as it requires present and updated electric field values). elif Material.maxpoles == 1: update_electric_dispersive_1pole_A(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsE, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) elif Material.maxpoles > 1: update_electric_dispersive_multipole_A( G.nx, G.ny, G.nz, G.nthreads, Material.maxpoles, G.updatecoeffsE, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) # Update electric field components with the PML correction for pml in G.pmls: pml.update_electric(G) # Update electric field components from sources (update any Hertzian dipole sources last) for source in G.voltagesources + G.transmissionlines + G.hertziandipoles: source.update_electric(iteration, G.updatecoeffsE, G.ID, G.Ex, G.Ey, G.Ez, G) # If there are any dispersive materials do 2nd part of dispersive update # (it is split into two parts as it requires present and updated electric # field values). Therefore it can only be completely updated after the # electric field has been updated by the PML and source updates. if Material.maxpoles == 1: update_electric_dispersive_1pole_B(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez) elif Material.maxpoles > 1: update_electric_dispersive_multipole_B(G.nx, G.ny, G.nz, G.nthreads, Material.maxpoles, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez) tsolve = perf_counter() - tsolvestart return tsolve
def run_mpi_alt_sim(args, inputfile, usernamespace, optparams=None): """ Alternate MPI implementation that avoids using the spawn mechanism. Run mixed mode MPI/OpenMP simulation - MPI task farm for models with each model parallelised using either OpenMP (CPU) or CUDA (GPU) Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. optparams (dict): Optional argument. For Taguchi optimisation it provides the parameters to optimise and their values. """ from mpi4py import MPI # Define MPI message tags tags = Enum('tags', {'READY': 0, 'DONE': 1, 'EXIT': 2, 'START': 3}) # Initializations and preliminaries comm = MPI.COMM_WORLD size = comm.Get_size() # total number of processes rank = comm.Get_rank() # rank of this process status = MPI.Status() # get MPI status object hostname = MPI.Get_processor_name() # get name of processor/host # Set range for number of models to run modelstart = args.restart if args.restart else 1 modelend = modelstart + args.n numbermodelruns = args.n currentmodelrun = modelstart # can use -task argument to start numbering from something other than 1 numworkers = size - 1 ################## # Master process # ################## if rank == 0: tsimstart = perf_counter() print('MPI master (rank {}, PID {}) on {} using {} workers\n'.format( rank, os.getpid(), hostname, numworkers)) closedworkers = 0 while closedworkers < numworkers: data = comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) source = status.Get_source() tag = status.Get_tag() # Worker is ready, so send it a task if tag == tags.READY.value: if currentmodelrun < modelend: comm.send(currentmodelrun, dest=source, tag=tags.START.value) currentmodelrun += 1 else: comm.send(None, dest=source, tag=tags.EXIT.value) # Worker has completed a task elif tag == tags.DONE.value: pass # Worker has completed all tasks elif tag == tags.EXIT.value: closedworkers += 1 # Shutdown communicator comm.Disconnect() tsimend = perf_counter() simcompletestr = '\n=== Simulation completed in [HH:MM:SS]: {}'.format( datetime.timedelta(seconds=tsimend - tsimstart)) print('{} {}\n'.format( simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr)))) ################## # Worker process # ################## else: while True: comm.send(None, dest=0, tag=tags.READY.value) # Receive a model number to run from the master currentmodelrun = comm.recv(source=0, tag=MPI.ANY_TAG, status=status) tag = status.Get_tag() # Run a model if tag == tags.START.value: # Get info and setup device ID for GPU(s) gpuinfo = '' if args.gpu is not None: # Set device ID for multiple GPUs if isinstance(args.gpu, list): deviceID = (rank - 1) % len(args.gpu) args.gpu = next(gpu for gpu in args.gpu if gpu.deviceID == deviceID) gpuinfo = ' using {} - {}, {}'.format( args.gpu.deviceID, args.gpu.name, human_size(args.gpu.totalmem, a_kilobyte_is_1024_bytes=True)) # If Taguchi optimistaion, add specific value for each parameter # to optimise for each experiment to user accessible namespace if optparams: tmp = {} tmp.update((key, value[currentmodelrun - 1]) for key, value in optparams.items()) modelusernamespace = usernamespace.copy() modelusernamespace.update({'optparams': tmp}) else: modelusernamespace = usernamespace # Run the model print('MPI worker (rank {}) starting model {}/{}{} on {}\n'. format(rank, currentmodelrun, numbermodelruns, gpuinfo, hostname)) run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, modelusernamespace) comm.send(None, dest=0, tag=tags.DONE.value) # Break out of loop when work receives exit message elif tag == tags.EXIT.value: break comm.send(None, dest=0, tag=tags.EXIT.value)
def run_mpi_sim(args, inputfile, usernamespace, optparams=None): """ Run mixed mode MPI/OpenMP simulation - MPI task farm for models with each model parallelised using either OpenMP (CPU) or CUDA (GPU) Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. optparams (dict): Optional argument. For Taguchi optimisation it provides the parameters to optimise and their values. """ from mpi4py import MPI status = MPI.Status() hostname = MPI.Get_processor_name() # Set range for number of models to run modelstart = args.restart if args.restart else 1 modelend = modelstart + args.n numbermodelruns = args.n # Command line flag used to indicate a spawned worker instance workerflag = '--mpi-worker' numworkers = args.mpi - 1 ################## # Master process # ################## if workerflag not in sys.argv: # N.B Spawned worker flag (--mpi-worker) applied to sys.argv when MPI.Spawn is called # Get MPI communicator object either through argument or just get comm_world if hasattr(args, 'mpicomm'): comm = args.mpicomm else: comm = MPI.COMM_WORLD size = comm.Get_size() # total number of processes rank = comm.Get_rank() # rank of this process tsimstart = perf_counter() print('MPI master ({}, rank {}) on {} using {} workers\n'.format( comm.name, rank, hostname, numworkers)) # Assemble a sys.argv replacement to pass to spawned worker # N.B This is required as sys.argv not available when gprMax is called via api() # Ignore mpicomm object if it exists as only strings can be passed via spawn myargv = [] for key, value in vars(args).items(): if value: if 'inputfile' in key: myargv.append(value) elif 'gpu' in key: myargv.append('-' + key) if not isinstance(value, list): myargv.append(str(value.deviceID)) elif 'mpicomm' in key: pass elif '_' in key: key = key.replace('_', '-') myargv.append('--' + key) myargv.append(str(value)) else: myargv.append('-' + key) myargv.append(str(value)) # Create a list of work worklist = [] for model in range(modelstart, modelend): workobj = dict() workobj['currentmodelrun'] = model if optparams: workobj['optparams'] = optparams worklist.append(workobj) # Add stop sentinels worklist += ([StopIteration] * numworkers) # Spawn workers newcomm = comm.Spawn(sys.executable, args=['-m', 'gprMax'] + myargv + [workerflag], maxprocs=numworkers) # Reply to whoever asks until done for work in worklist: newcomm.recv(source=MPI.ANY_SOURCE, status=status) newcomm.send(obj=work, dest=status.Get_source()) # Shutdown communicators newcomm.Disconnect() tsimend = perf_counter() simcompletestr = '\n=== Simulation completed in [HH:MM:SS]: {}'.format( datetime.timedelta(seconds=tsimend - tsimstart)) print('{} {}\n'.format( simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr)))) ################## # Worker process # ################## elif workerflag in sys.argv: # Connect to parent to get communicator try: comm = MPI.Comm.Get_parent() rank = comm.Get_rank() except ValueError: raise ValueError( 'MPI worker (rank {}) could not connect to parent') # Ask for work until stop sentinel for work in iter(lambda: comm.sendrecv(0, dest=0), StopIteration): currentmodelrun = work['currentmodelrun'] # Get info and setup device ID for GPU(s) gpuinfo = '' if args.gpu is not None: # Set device ID for multiple GPUs if isinstance(args.gpu, list): deviceID = (rank - 1) % len(args.gpu) args.gpu = next(gpu for gpu in args.gpu if gpu.deviceID == deviceID) gpuinfo = ' using {} - {}, {} RAM '.format( args.gpu.deviceID, args.gpu.name, human_size(args.gpu.totalmem, a_kilobyte_is_1024_bytes=True)) # If Taguchi optimistaion, add specific value for each parameter to # optimise for each experiment to user accessible namespace if 'optparams' in work: tmp = {} tmp.update((key, value[currentmodelrun - 1]) for key, value in work['optparams'].items()) modelusernamespace = usernamespace.copy() modelusernamespace.update({'optparams': tmp}) else: modelusernamespace = usernamespace # Run the model print('MPI worker (rank {}) starting model {}/{}{} on {}\n'.format( rank, currentmodelrun, numbermodelruns, gpuinfo, hostname)) run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, modelusernamespace) # Shutdown comm.Disconnect()
def run_mpi_sim(args, inputfile, usernamespace, optparams=None): """ Run mixed mode MPI/OpenMP simulation - MPI task farm for models with each model parallelised using either OpenMP (CPU) or CUDA (GPU) Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. optparams (dict): Optional argument. For Taguchi optimisation it provides the parameters to optimise and their values. """ from mpi4py import MPI status = MPI.Status() hostname = platform.node() # Set range for number of models to run modelstart = args.restart if args.restart else 1 modelend = modelstart + args.n numbermodelruns = args.n # Command line flag used to indicate a spawned worker instance workerflag = '--mpi-worker' numworkers = args.mpi - 1 ################## # Master process # ################## if workerflag not in sys.argv: # N.B Spawned worker flag (--mpi-worker) applied to sys.argv when MPI.Spawn is called # See if the MPI communicator object is being passed as an argument (likely from a MPI.Split) if args.mpicomm is not None: comm = args.mpicomm else: comm = MPI.COMM_WORLD tsimstart = timer() mpistartstr = '\n=== MPI task farm (USING MPI Spawn)' print('{} {}'.format( mpistartstr, '=' * (get_terminal_width() - 1 - len(mpistartstr)))) print('=== MPI master ({}, rank: {}) on {} spawning {} workers...'. format(comm.name, comm.Get_rank(), hostname, numworkers)) # Assemble a sys.argv replacement to pass to spawned worker # N.B This is required as sys.argv not available when gprMax is called via api() # Ignore mpicomm object if it exists as only strings can be passed via spawn myargv = [] for key, value in vars(args).items(): if value: # Input file name always comes first if 'inputfile' in key: myargv.append(value) elif 'gpu' in key: myargv.append('-' + key) # Add GPU device ID(s) from GPU objects for gpu in args.gpu: myargv.append(str(gpu.deviceID)) elif 'mpicomm' in key: pass elif '_' in key: key = key.replace('_', '-') myargv.append('--' + key) else: myargv.append('-' + key) if value is not True: myargv.append(str(value)) # Create a list of work worklist = [] for model in range(modelstart, modelend): workobj = dict() workobj['currentmodelrun'] = model workobj['mpicommname'] = comm.name if optparams: workobj['optparams'] = optparams worklist.append(workobj) # Add stop sentinels worklist += ([StopIteration] * numworkers) # Spawn workers newcomm = comm.Spawn(sys.executable, args=['-m', 'gprMax'] + myargv + [workerflag], maxprocs=numworkers) # Reply to whoever asks until done for work in worklist: newcomm.recv(source=MPI.ANY_SOURCE, status=status) newcomm.send(obj=work, dest=status.Get_source()) # Shutdown communicators newcomm.Disconnect() tsimend = timer() simcompletestr = '\n=== MPI master ({}, rank: {}) on {} completed simulation in [HH:MM:SS]: {}'.format( comm.name, comm.Get_rank(), hostname, datetime.timedelta(seconds=tsimend - tsimstart)) print('{} {}\n'.format( simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr)))) ################## # Worker process # ################## elif workerflag in sys.argv: # Connect to parent to get communicator try: comm = MPI.Comm.Get_parent() rank = comm.Get_rank() except ValueError: raise ValueError('MPI worker could not connect to parent') # Select GPU and get info gpuinfo = '' if args.gpu is not None: # Set device ID based on rank from list of GPUs try: args.gpu = args.gpu[rank] # GPUs on multiple nodes where CUDA_VISIBLE_DEVICES is the same # on each node except: args.gpu = args.gpu[rank % len(args.gpu)] gpuinfo = ' using {} - {}, {} RAM '.format( args.gpu.deviceID, args.gpu.name, human_size(args.gpu.totalmem, a_kilobyte_is_1024_bytes=True)) # Ask for work until stop sentinel for work in iter(lambda: comm.sendrecv(0, dest=0), StopIteration): currentmodelrun = work['currentmodelrun'] # If Taguchi optimisation, add specific value for each parameter to # optimise for each experiment to user accessible namespace if 'optparams' in work: tmp = {} tmp.update((key, value[currentmodelrun - 1]) for key, value in work['optparams'].items()) modelusernamespace = usernamespace.copy() modelusernamespace.update({'optparams': tmp}) else: modelusernamespace = usernamespace # Run the model print( 'Starting MPI spawned worker (parent: {}, rank: {}) on {} with model {}/{}{}\n' .format(work['mpicommname'], rank, hostname, currentmodelrun, numbermodelruns, gpuinfo)) tsolve = run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, modelusernamespace) print( 'Completed MPI spawned worker (parent: {}, rank: {}) on {} with model {}/{}{} in [HH:MM:SS]: {}\n' .format(work['mpicommname'], rank, hostname, currentmodelrun, numbermodelruns, gpuinfo, datetime.timedelta(seconds=tsolve))) # Shutdown comm.Disconnect()
def run_opt_sim(args, inputfile, usernamespace): """Run a simulation using Taguchi's optmisation process. Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. """ tsimstart = perf_counter() if args.n > 1: raise CmdInputError( 'When a Taguchi optimisation is being carried out the number of model runs argument is not required' ) inputfileparts = os.path.splitext(inputfile.name) # Default maximum number of iterations of optimisation to perform (used # if the stopping criterion is not achieved) maxiterations = 20 # Process Taguchi code blocks in the input file; pass in ordered # dictionary to hold parameters to optimise tmp = usernamespace.copy() tmp.update({'optparams': OrderedDict()}) taguchinamespace = taguchi_code_blocks(inputfile, tmp) # Extract dictionaries and variables containing initialisation parameters optparams = taguchinamespace['optparams'] fitness = taguchinamespace['fitness'] if 'maxiterations' in taguchinamespace: maxiterations = taguchinamespace['maxiterations'] # Store initial parameter ranges optparamsinit = list(optparams.items()) # Dictionary to hold history of optmised values of parameters optparamshist = OrderedDict((key, list()) for key in optparams) # Import specified fitness function fitness_metric = getattr( import_module('user_libs.optimisation_taguchi.fitness_functions'), fitness['name']) # Select OA OA, N, cols, k, s, t = construct_OA(optparams) taguchistr = '\n--- Taguchi optimisation' print('{} {}\n'.format(taguchistr, '-' * (get_terminal_width() - 1 - len(taguchistr)))) print( 'Orthogonal array: {:g} experiments per iteration, {:g} parameters ({:g} will be used), {:g} levels, and strength {:g}' .format(N, cols, k, s, t)) tmp = [(k, v) for k, v in optparams.items()] print('Parameters to optimise with ranges: {}'.format( str(tmp).strip('[]'))) print('Output name(s) from model: {}'.format(fitness['args']['outputs'])) print('Fitness function "{}" with stopping criterion {:g}'.format( fitness['name'], fitness['stop'])) print('Maximum iterations: {:g}'.format(maxiterations)) # Initialise arrays and lists to store parameters required throughout optimisation # Lower, central, and upper values for each parameter levels = np.zeros((s, k), dtype=floattype) # Optimal lower, central, or upper value for each parameter levelsopt = np.zeros(k, dtype=np.uint8) # Difference used to set values for levels levelsdiff = np.zeros(k, dtype=floattype) # History of fitness values from each confirmation experiment fitnessvalueshist = [] iteration = 0 while iteration < maxiterations: # Reset number of model runs to number of experiments args.n = N usernamespace['number_model_runs'] = N # Fitness values for each experiment fitnessvalues = [] # Set parameter ranges and define experiments optparams, levels, levelsdiff = calculate_ranges_experiments( optparams, optparamsinit, levels, levelsopt, levelsdiff, OA, N, k, s, iteration) # Run model for each experiment # Mixed mode MPI with OpenMP or CUDA - MPI task farm for models with # each model parallelised with OpenMP (CPU) or CUDA (GPU) if args.mpi: run_mpi_sim(args, inputfile, usernamespace, optparams) # Standard behaviour - models run serially with each model parallelised # with OpenMP (CPU) or CUDA (GPU) else: run_std_sim(args, inputfile, usernamespace, optparams) # Calculate fitness value for each experiment for experiment in range(1, N + 1): outputfile = inputfileparts[0] + str(experiment) + '.out' fitnessvalues.append(fitness_metric(outputfile, fitness['args'])) os.remove(outputfile) taguchistr = '\n--- Taguchi optimisation, iteration {}: {} initial experiments with fitness values {}.'.format( iteration + 1, N, fitnessvalues) print('{} {}\n'.format( taguchistr, '-' * (get_terminal_width() - 1 - len(taguchistr)))) # Calculate optimal levels from fitness values by building a response # table; update dictionary of parameters with optimal values optparams, levelsopt = calculate_optimal_levels( optparams, levels, levelsopt, fitnessvalues, OA, N, k) # Update dictionary with history of parameters with optimal values for key, value in optparams.items(): optparamshist[key].append(value[0]) # Run a confirmation experiment with optimal values args.n = 1 usernamespace['number_model_runs'] = 1 # Mixed mode MPI with OpenMP or CUDA - MPI task farm for models with # each model parallelised with OpenMP (CPU) or CUDA (GPU) if args.mpi: run_mpi_sim(args, inputfile, usernamespace, optparams) # Standard behaviour - models run serially with each model parallelised # with OpenMP (CPU) or CUDA (GPU) else: run_std_sim(args, inputfile, usernamespace, optparams) # Calculate fitness value for confirmation experiment outputfile = inputfileparts[0] + '.out' fitnessvalueshist.append(fitness_metric(outputfile, fitness['args'])) # Rename confirmation experiment output file so that it is retained for each iteraction os.rename( outputfile, os.path.splitext(outputfile)[0] + '_final' + str(iteration + 1) + '.out') taguchistr = '\n--- Taguchi optimisation, iteration {} completed. History of optimal parameter values {} and of fitness values {}'.format( iteration + 1, dict(optparamshist), fitnessvalueshist) print('{} {}\n'.format( taguchistr, '-' * (get_terminal_width() - 1 - len(taguchistr)))) iteration += 1 # Stop optimisation if stopping criterion has been reached if fitnessvalueshist[iteration - 1] > fitness['stop']: taguchistr = '\n--- Taguchi optimisation stopped as fitness criteria reached: {:g} > {:g}'.format( fitnessvalueshist[iteration - 1], fitness['stop']) print('{} {}\n'.format( taguchistr, '-' * (get_terminal_width() - 1 - len(taguchistr)))) break # Stop optimisation if successive fitness values are within a percentage threshold fitnessvaluesthres = 0.1 if iteration > 2: fitnessvaluesclose = (np.abs(fitnessvalueshist[iteration - 2] - fitnessvalueshist[iteration - 1]) / fitnessvalueshist[iteration - 1]) * 100 if fitnessvaluesclose < fitnessvaluesthres: taguchistr = '\n--- Taguchi optimisation stopped as successive fitness values within {}%'.format( fitnessvaluesthres) print('{} {}\n'.format( taguchistr, '-' * (get_terminal_width() - 1 - len(taguchistr)))) break tsimend = perf_counter() # Save optimisation parameters history and fitness values history to file opthistfile = inputfileparts[0] + '_hist.pickle' with open(opthistfile, 'wb') as f: pickle.dump(optparamshist, f) pickle.dump(fitnessvalueshist, f) pickle.dump(optparamsinit, f) taguchistr = '\n=== Taguchi optimisation completed in [HH:MM:SS]: {} after {} iteration(s)'.format( datetime.timedelta(seconds=int(tsimend - tsimstart)), iteration) print('{} {}\n'.format(taguchistr, '=' * (get_terminal_width() - 1 - len(taguchistr)))) print('History of optimal parameter values {} and of fitness values {}\n'. format(dict(optparamshist), fitnessvalueshist))
def solve_cpu(currentmodelrun, modelend, G): """ Solving using FDTD method on CPU. Parallelised using Cython (OpenMP) for electric and magnetic field updates, and PML updates. Args: currentmodelrun (int): Current model run number. modelend (int): Number of last model to run. G (class): Grid class instance - holds essential parameters describing the model. Returns: tsolve (float): Time taken to execute solving """ tsolvestart = timer() for iteration in tqdm(range(G.iterations), desc='Running simulation, model ' + str(currentmodelrun) + '/' + str(modelend), ncols=get_terminal_width() - 1, file=sys.stdout, disable=not G.progressbars): # Store field component values for every receiver and transmission line store_outputs(iteration, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G) # Store any snapshots for snap in G.snapshots: if snap.time == iteration + 1: snap.store(G) # Update magnetic field components update_magnetic(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsH, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) # Update magnetic field components with the PML correction for pml in G.pmls: pml.update_magnetic(G) # Update magnetic field components from sources for source in G.transmissionlines + G.magneticdipoles: source.update_magnetic(iteration, G.updatecoeffsH, G.ID, G.Hx, G.Hy, G.Hz, G) # Update electric field components # All materials are non-dispersive so do standard update if Material.maxpoles == 0: update_electric(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsE, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) # If there are any dispersive materials do 1st part of dispersive update # (it is split into two parts as it requires present and updated electric field values). elif Material.maxpoles == 1: update_electric_dispersive_1pole_A(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsE, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) elif Material.maxpoles > 1: update_electric_dispersive_multipole_A( G.nx, G.ny, G.nz, G.nthreads, Material.maxpoles, G.updatecoeffsE, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) # Update electric field components with the PML correction for pml in G.pmls: pml.update_electric(G) # Update electric field components from sources (update any Hertzian dipole sources last) for source in G.voltagesources + G.transmissionlines + G.hertziandipoles: source.update_electric(iteration, G.updatecoeffsE, G.ID, G.Ex, G.Ey, G.Ez, G) # If there are any dispersive materials do 2nd part of dispersive update # (it is split into two parts as it requires present and updated electric # field values). Therefore it can only be completely updated after the # electric field has been updated by the PML and source updates. if Material.maxpoles == 1: update_electric_dispersive_1pole_B(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez) elif Material.maxpoles > 1: update_electric_dispersive_multipole_B(G.nx, G.ny, G.nz, G.nthreads, Material.maxpoles, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez) tsolve = timer() - tsolvestart return tsolve