def memory_check(self, snapsmemsize=0): """Check if the required amount of memory (RAM) is available on the host and GPU if specified. Args: snapsmemsize (int): amount of memory (bytes) required to store all requested snapshots """ # Check if model can be built and/or run on host if self.memoryusage > self.hostinfo['ram']: raise GeneralError( 'Memory (RAM) required ~{} exceeds {} detected!\n'.format( human_size(self.memoryusage), human_size(self.hostinfo['ram'], a_kilobyte_is_1024_bytes=True))) # Check if model can be run on specified GPU if required if self.gpu is not None: if self.memoryusage - snapsmemsize > self.gpu.totalmem: raise GeneralError( 'Memory (RAM) required ~{} exceeds {} detected on specified {} - {} GPU!\n' .format( human_size(self.memoryusage), human_size(self.gpu.totalmem, a_kilobyte_is_1024_bytes=True), self.gpu.deviceID, self.gpu.name)) # If the required memory without the snapshots will fit on the GPU then transfer and store snaphots on host if snapsmemsize != 0 and self.memoryusage - snapsmemsize < self.gpu.totalmem: self.snapsgpu2cpu = True
def run_mpi_no_spawn_sim(args, inputfile, usernamespace, optparams=None): """ Alternate MPI implementation that avoids using the MPI spawn mechanism. This implementation is designed to be used as e.g. 'mpirun -n 5 python -m gprMax user_models/mymodel.in -n 10 --mpi-no-spawn' Run mixed mode MPI/OpenMP simulation - MPI task farm for models with each model parallelised using either OpenMP (CPU) or CUDA (GPU) Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. optparams (dict): Optional argument. For Taguchi optimisation it provides the parameters to optimise and their values. """ from mpi4py import MPI # Define MPI message tags tags = Enum('tags', {'READY': 0, 'DONE': 1, 'EXIT': 2, 'START': 3}) # Initializations and preliminaries comm = MPI.COMM_WORLD size = comm.Get_size() # total number of processes rank = comm.Get_rank() # rank of this process status = MPI.Status() # get MPI status object hostname = MPI.Get_processor_name() # get name of processor/host # Set range for number of models to run modelstart = args.restart if args.restart else 1 modelend = modelstart + args.n numbermodelruns = args.n currentmodelrun = modelstart # can use -task argument to start numbering from something other than 1 numworkers = size - 1 ################## # Master process # ################## if rank == 0: tsimstart = perf_counter() mpimasterstr = '=== MPI master ({}, rank: {}) on {} using {} workers...\n'.format( comm.name, comm.Get_rank(), hostname, numworkers) print('{} {}\n'.format( mpimasterstr, '=' * (get_terminal_width() - 1 - len(mpimasterstr)))) closedworkers = 0 while closedworkers < numworkers: comm.recv(source=MPI.ANY_SOURCE, tag=MPI.ANY_TAG, status=status) source = status.Get_source() tag = status.Get_tag() # Worker is ready, so send it a task if tag == tags.READY.value: if currentmodelrun < modelend: comm.send(currentmodelrun, dest=source, tag=tags.START.value) currentmodelrun += 1 else: comm.send(None, dest=source, tag=tags.EXIT.value) # Worker has completed a task elif tag == tags.DONE.value: pass # Worker has completed all tasks elif tag == tags.EXIT.value: closedworkers += 1 tsimend = perf_counter() simcompletestr = '\n=== MPI master ({}, rank: {}) on {} completed simulation in [HH:MM:SS]: {}'.format( comm.name, comm.Get_rank(), hostname, datetime.timedelta(seconds=tsimend - tsimstart)) print('{} {}\n'.format( simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr)))) ################## # Worker process # ################## else: # Get info and setup device ID for GPU(s) gpuinfo = '' if args.gpu is not None: # Set device ID based on rank from list of GPUs deviceID = (rank - 1) % len(args.gpu) args.gpu = next(gpu for gpu in args.gpu if gpu.deviceID == deviceID) gpuinfo = ' using {} - {}, {}'.format( args.gpu.deviceID, args.gpu.name, human_size(args.gpu.totalmem, a_kilobyte_is_1024_bytes=True)) while True: comm.send(None, dest=0, tag=tags.READY.value) # Receive a model number to run from the master currentmodelrun = comm.recv(source=0, tag=MPI.ANY_TAG, status=status) tag = status.Get_tag() # Run a model if tag == tags.START.value: # If Taguchi optimistaion, add specific value for each parameter # to optimise for each experiment to user accessible namespace if optparams: tmp = {} tmp.update((key, value[currentmodelrun - 1]) for key, value in optparams.items()) modelusernamespace = usernamespace.copy() modelusernamespace.update({'optparams': tmp}) else: modelusernamespace = usernamespace # Run the model print( 'MPI worker (parent: {}, rank: {}) on {} starting model {}/{}{}\n' .format(comm.name, rank, hostname, currentmodelrun, numbermodelruns, gpuinfo)) run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, modelusernamespace) comm.send(None, dest=0, tag=tags.DONE.value) # Break out of loop when work receives exit message elif tag == tags.EXIT.value: break comm.send(None, dest=0, tag=tags.EXIT.value)
def run_mpi_sim(args, inputfile, usernamespace, optparams=None): """ Run mixed mode MPI/OpenMP simulation - MPI task farm for models with each model parallelised using either OpenMP (CPU) or CUDA (GPU) Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. optparams (dict): Optional argument. For Taguchi optimisation it provides the parameters to optimise and their values. """ from mpi4py import MPI status = MPI.Status() hostname = MPI.Get_processor_name() # Set range for number of models to run modelstart = args.restart if args.restart else 1 modelend = modelstart + args.n numbermodelruns = args.n # Command line flag used to indicate a spawned worker instance workerflag = '--mpi-worker' numworkers = args.mpi - 1 ################## # Master process # ################## if workerflag not in sys.argv: # N.B Spawned worker flag (--mpi-worker) applied to sys.argv when MPI.Spawn is called # See if the MPI communicator object is being passed as an argument (likely from a MPI.Split) if hasattr(args, 'mpicomm'): comm = args.mpicomm else: comm = MPI.COMM_WORLD tsimstart = perf_counter() mpimasterstr = '=== MPI master ({}, rank: {}) on {} spawning {} workers...'.format( comm.name, comm.Get_rank(), hostname, numworkers) print('{} {}\n'.format( mpimasterstr, '=' * (get_terminal_width() - 1 - len(mpimasterstr)))) # Assemble a sys.argv replacement to pass to spawned worker # N.B This is required as sys.argv not available when gprMax is called via api() # Ignore mpicomm object if it exists as only strings can be passed via spawn myargv = [] for key, value in vars(args).items(): if value: # Input file name always comes first if 'inputfile' in key: myargv.append(value) elif 'gpu' in key: myargv.append('-' + key) # Add GPU device ID(s) from GPU objects for gpu in args.gpu: myargv.append(str(gpu.deviceID)) elif 'mpicomm' in key: pass elif '_' in key: key = key.replace('_', '-') myargv.append('--' + key) else: myargv.append('-' + key) if value is not True: myargv.append(str(value)) # Create a list of work worklist = [] for model in range(modelstart, modelend): workobj = dict() workobj['currentmodelrun'] = model workobj['mpicommname'] = comm.name if optparams: workobj['optparams'] = optparams worklist.append(workobj) # Add stop sentinels worklist += ([StopIteration] * numworkers) # Spawn workers newcomm = comm.Spawn(sys.executable, args=['-m', 'gprMax'] + myargv + [workerflag], maxprocs=numworkers) # Reply to whoever asks until done for work in worklist: newcomm.recv(source=MPI.ANY_SOURCE, status=status) newcomm.send(obj=work, dest=status.Get_source()) # Shutdown communicators newcomm.Disconnect() tsimend = perf_counter() simcompletestr = '\n=== MPI master ({}, rank: {}) on {} completed simulation in [HH:MM:SS]: {}'.format( comm.name, comm.Get_rank(), hostname, datetime.timedelta(seconds=tsimend - tsimstart)) print('{} {}\n'.format( simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr)))) ################## # Worker process # ################## elif workerflag in sys.argv: # Connect to parent to get communicator try: comm = MPI.Comm.Get_parent() rank = comm.Get_rank() except ValueError: raise ValueError('MPI worker could not connect to parent') # Select GPU and get info gpuinfo = '' if args.gpu is not None: # Set device ID based on rank from list of GPUs args.gpu = args.gpu[rank] gpuinfo = ' using {} - {}, {} RAM '.format( args.gpu.deviceID, args.gpu.name, human_size(args.gpu.totalmem, a_kilobyte_is_1024_bytes=True)) # Ask for work until stop sentinel for work in iter(lambda: comm.sendrecv(0, dest=0), StopIteration): currentmodelrun = work['currentmodelrun'] # If Taguchi optimisation, add specific value for each parameter to # optimise for each experiment to user accessible namespace if 'optparams' in work: tmp = {} tmp.update((key, value[currentmodelrun - 1]) for key, value in work['optparams'].items()) modelusernamespace = usernamespace.copy() modelusernamespace.update({'optparams': tmp}) else: modelusernamespace = usernamespace # Run the model print( 'MPI spawned worker (parent: {}, rank: {}) on {} starting model {}/{}{}\n' .format(work['mpicommname'], rank, hostname, currentmodelrun, numbermodelruns, gpuinfo)) run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, modelusernamespace) # Shutdown comm.Disconnect()
def run_main(args): """ Top-level function that controls what mode of simulation (standard/optimsation/benchmark etc...) is run. Args: args (dict): Namespace with input arguments from command line or api. """ # Print gprMax logo, version, and licencing/copyright information logo(__version__ + ' (' + codename + ')') with open_path_file(args.inputfile) as inputfile: # Get information about host machine hostinfo = get_host_info() hyperthreading = ', {} cores with Hyper-Threading'.format( hostinfo['logicalcores']) if hostinfo['hyperthreading'] else '' print('\nHost: {} | {} | {} x {} ({} cores{}) | {} RAM | {}'.format( hostinfo['hostname'], hostinfo['machineID'], hostinfo['sockets'], hostinfo['cpuID'], hostinfo['physicalcores'], hyperthreading, human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion'])) # Get information/setup any Nvidia GPU(s) if args.gpu is not None: # Flatten a list of lists if any(isinstance(element, list) for element in args.gpu): args.gpu = [val for sublist in args.gpu for val in sublist] gpus, allgpustext = detect_check_gpus(args.gpu) print('GPU(s) detected: {}'.format(' | '.join(allgpustext))) # If in MPI mode or benchmarking provide list of GPU objects, otherwise # provide single GPU object if args.mpi or args.mpi_no_spawn or args.benchmark: args.gpu = gpus else: args.gpu = gpus[0] # Create a separate namespace that users can access in any Python code blocks in the input file usernamespace = { 'c': c, 'e0': e0, 'm0': m0, 'z0': z0, 'number_model_runs': args.n, 'inputfile': os.path.abspath(inputfile.name) } ####################################### # Process for benchmarking simulation # ####################################### if args.benchmark: if args.mpi or args.opt_taguchi or args.task or args.n > 1: raise GeneralError( 'Benchmarking mode cannot be combined with MPI, job array, or Taguchi optimisation modes, or multiple model runs.' ) run_benchmark_sim(args, inputfile, usernamespace) #################################################### # Process for simulation with Taguchi optimisation # #################################################### elif args.opt_taguchi: if args.mpi_worker: # Special case for MPI spawned workers - they do not need to enter the Taguchi optimisation mode run_mpi_sim(args, inputfile, usernamespace) else: from gprMax.optimisation_taguchi import run_opt_sim run_opt_sim(args, inputfile, usernamespace) ################################################ # Process for standard simulation (CPU or GPU) # ################################################ else: # Mixed mode MPI with OpenMP or CUDA - MPI task farm for models with each model parallelised with OpenMP (CPU) or CUDA (GPU) if args.mpi: if args.n == 1: raise GeneralError( 'MPI is not beneficial when there is only one model to run' ) if args.task: raise GeneralError( 'MPI cannot be combined with job array mode') run_mpi_sim(args, inputfile, usernamespace) # Alternate MPI configuration that does not use MPI spawn mechanism elif args.mpi_no_spawn: if args.n == 1: raise GeneralError( 'MPI is not beneficial when there is only one model to run' ) if args.task: raise GeneralError( 'MPI cannot be combined with job array mode') run_mpi_no_spawn_sim(args, inputfile, usernamespace) # Standard behaviour - models run serially with each model parallelised with OpenMP (CPU) or CUDA (GPU) else: if args.task and args.restart: raise GeneralError( 'Job array and restart modes cannot be used together') run_std_sim(args, inputfile, usernamespace)
def run_model(args, currentmodelrun, modelend, numbermodelruns, inputfile, usernamespace): """Runs a model - processes the input file; builds the Yee cells; calculates update coefficients; runs main FDTD loop. Args: args (dict): Namespace with command line arguments currentmodelrun (int): Current model run number. modelend (int): Number of last model to run. numbermodelruns (int): Total number of model runs. inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. Returns: tsolve (int): Length of time (seconds) of main FDTD calculations """ # Monitor memory usage p = psutil.Process() # Declare variable to hold FDTDGrid class global G # Used for naming geometry and output files appendmodelnumber = '' if numbermodelruns == 1 and not args.task and not args.restart else '_'+str(currentmodelrun) appendmodelnumberGeometry = '' if numbermodelruns == 1 and not args.task and not args.restart or args.geometry_fixed else '_'+str(currentmodelrun) # Normal model reading/building process; bypassed if geometry information to be reused if 'G' not in globals(): # Initialise an instance of the FDTDGrid class G = FDTDGrid() # Get information about host machine # (need to save this info to FDTDGrid instance after it has been created) G.hostinfo = get_host_info() # Single GPU object if args.gpu: G.gpu = args.gpu G.inputfilename = os.path.split(inputfile.name)[1] G.inputdirectory = os.path.dirname(os.path.abspath(inputfile.name)) inputfilestr = '\n--- Model {}/{}, input file: {}'.format(currentmodelrun, modelend, inputfile.name) if G.messages: print(Fore.GREEN + '{} {}\n'.format(inputfilestr, '-' * (get_terminal_width() - 1 - len(inputfilestr))) + Style.RESET_ALL) # Add the current model run to namespace that can be accessed by # user in any Python code blocks in input file usernamespace['current_model_run'] = currentmodelrun # Read input file and process any Python and include file commands processedlines = process_python_include_code(inputfile, usernamespace) # Print constants/variables in user-accessable namespace uservars = '' for key, value in sorted(usernamespace.items()): if key != '__builtins__': uservars += '{}: {}, '.format(key, value) if G.messages: print('Constants/variables used/available for Python scripting: {{{}}}\n'.format(uservars[:-2])) # Write a file containing the input commands after Python or include file commands have been processed if args.write_processed: write_processed_file(processedlines, appendmodelnumber, G) # Check validity of command names and that essential commands are present singlecmds, multicmds, geometry = check_cmd_names(processedlines) # Create built-in materials m = Material(0, 'pec') m.se = float('inf') m.type = 'builtin' m.averagable = False G.materials.append(m) m = Material(1, 'free_space') m.type = 'builtin' G.materials.append(m) # Process parameters for commands that can only occur once in the model process_singlecmds(singlecmds, G) # Process parameters for commands that can occur multiple times in the model if G.messages: print() process_multicmds(multicmds, G) # Estimate and check memory (RAM) usage G.memory_estimate_basic() #G.memory_check() #if G.messages: # if G.gpu is None: # print('\nMemory (RAM) required: ~{}\n'.format(human_size(G.memoryusage))) # else: # print('\nMemory (RAM) required: ~{} host + ~{} GPU\n'.format(human_size(G.memoryusage), human_size(G.memoryusage))) # Initialise an array for volumetric material IDs (solid), boolean # arrays for specifying materials not to be averaged (rigid), # an array for cell edge IDs (ID) G.initialise_geometry_arrays() # Initialise arrays for the field components if G.gpu is None: G.initialise_field_arrays() # Process geometry commands in the order they were given process_geometrycmds(geometry, G) # Build the PMLs and calculate initial coefficients if G.messages: print() if all(value == 0 for value in G.pmlthickness.values()): if G.messages: print('PML: switched off') pass # If all the PMLs are switched off don't need to build anything else: # Set default CFS parameters for PML if not given if not G.cfs: G.cfs = [CFS()] if G.messages: if all(value == G.pmlthickness['x0'] for value in G.pmlthickness.values()): pmlinfo = str(G.pmlthickness['x0']) else: pmlinfo = '' for key, value in G.pmlthickness.items(): pmlinfo += '{}: {}, '.format(key, value) pmlinfo = pmlinfo[:-2] + ' cells' print('PML: formulation: {}, order: {}, thickness: {}'.format(G.pmlformulation, len(G.cfs), pmlinfo)) pbar = tqdm(total=sum(1 for value in G.pmlthickness.values() if value > 0), desc='Building PML boundaries', ncols=get_terminal_width() - 1, file=sys.stdout, disable=not G.progressbars) build_pmls(G, pbar) pbar.close() # Build the model, i.e. set the material properties (ID) for every edge # of every Yee cell if G.messages: print() pbar = tqdm(total=2, desc='Building main grid', ncols=get_terminal_width() - 1, file=sys.stdout, disable=not G.progressbars) build_electric_components(G.solid, G.rigidE, G.ID, G) pbar.update() build_magnetic_components(G.solid, G.rigidH, G.ID, G) pbar.update() pbar.close() # Add PEC boundaries to invariant direction in 2D modes # N.B. 2D modes are a single cell slice of 3D grid if '2D TMx' in G.mode: # Ey & Ez components G.ID[1, 0, :, :] = 0 G.ID[1, 1, :, :] = 0 G.ID[2, 0, :, :] = 0 G.ID[2, 1, :, :] = 0 elif '2D TMy' in G.mode: # Ex & Ez components G.ID[0, :, 0, :] = 0 G.ID[0, :, 1, :] = 0 G.ID[2, :, 0, :] = 0 G.ID[2, :, 1, :] = 0 elif '2D TMz' in G.mode: # Ex & Ey components G.ID[0, :, :, 0] = 0 G.ID[0, :, :, 1] = 0 G.ID[1, :, :, 0] = 0 G.ID[1, :, :, 1] = 0 # Process any voltage sources (that have resistance) to create a new # material at the source location for voltagesource in G.voltagesources: voltagesource.create_material(G) # Initialise arrays of update coefficients to pass to update functions G.initialise_std_update_coeff_arrays() # Initialise arrays of update coefficients and temporary values if # there are any dispersive materials if Material.maxpoles != 0: # Update estimated memory (RAM) usage G.memoryusage += int(3 * Material.maxpoles * (G.nx + 1) * (G.ny + 1) * (G.nz + 1) * np.dtype(complextype).itemsize) G.memory_check() if G.messages: print('\nMemory (RAM) required - updated (dispersive): ~{}\n'.format(human_size(G.memoryusage))) G.initialise_dispersive_arrays() # Check there is sufficient memory to store any snapshots if G.snapshots: snapsmemsize = 0 for snap in G.snapshots: # 2 x required to account for electric and magnetic fields snapsmemsize += (2 * snap.datasizefield) G.memoryusage += int(snapsmemsize) G.memory_check(snapsmemsize=int(snapsmemsize)) if G.messages: print('\nMemory (RAM) required - updated (snapshots): ~{}\n'.format(human_size(G.memoryusage))) # Process complete list of materials - calculate update coefficients, # store in arrays, and build text list of materials/properties materialsdata = process_materials(G) if G.messages: print('\nMaterials:') materialstable = AsciiTable(materialsdata) materialstable.outer_border = False materialstable.justify_columns[0] = 'right' print(materialstable.table) # Check to see if numerical dispersion might be a problem results = dispersion_analysis(G) if results['error'] and G.messages: print(Fore.RED + "\nWARNING: Numerical dispersion analysis not carried out as {}".format(results['error']) + Style.RESET_ALL) elif results['N'] < G.mingridsampling: raise GeneralError("Non-physical wave propagation: Material '{}' has wavelength sampled by {} cells, less than required minimum for physical wave propagation. Maximum significant frequency estimated as {:g}Hz".format(results['material'].ID, results['N'], results['maxfreq'])) elif results['deltavp'] and np.abs(results['deltavp']) > G.maxnumericaldisp and G.messages: print(Fore.RED + "\nWARNING: Potentially significant numerical dispersion. Estimated largest physical phase-velocity error is {:.2f}% in material '{}' whose wavelength sampled by {} cells. Maximum significant frequency estimated as {:g}Hz".format(results['deltavp'], results['material'].ID, results['N'], results['maxfreq']) + Style.RESET_ALL) elif results['deltavp'] and G.messages: print("\nNumerical dispersion analysis: estimated largest physical phase-velocity error is {:.2f}% in material '{}' whose wavelength sampled by {} cells. Maximum significant frequency estimated as {:g}Hz".format(results['deltavp'], results['material'].ID, results['N'], results['maxfreq'])) # If geometry information to be reused between model runs else: inputfilestr = '\n--- Model {}/{}, input file (not re-processed, i.e. geometry fixed): {}'.format(currentmodelrun, modelend, inputfile.name) if G.messages: print(Fore.GREEN + '{} {}\n'.format(inputfilestr, '-' * (get_terminal_width() - 1 - len(inputfilestr))) + Style.RESET_ALL) if G.gpu is None: # Clear arrays for field components G.initialise_field_arrays() # Clear arrays for fields in PML for pml in G.pmls: pml.initialise_field_arrays() # Adjust position of simple sources and receivers if required if G.srcsteps[0] != 0 or G.srcsteps[1] != 0 or G.srcsteps[2] != 0: for source in itertools.chain(G.hertziandipoles, G.magneticdipoles): if currentmodelrun == 1: if source.xcoord + G.srcsteps[0] * modelend < 0 or source.xcoord + G.srcsteps[0] * modelend > G.nx or source.ycoord + G.srcsteps[1] * modelend < 0 or source.ycoord + G.srcsteps[1] * modelend > G.ny or source.zcoord + G.srcsteps[2] * modelend < 0 or source.zcoord + G.srcsteps[2] * modelend > G.nz: raise GeneralError('Source(s) will be stepped to a position outside the domain.') source.xcoord = source.xcoordorigin + (currentmodelrun - 1) * G.srcsteps[0] source.ycoord = source.ycoordorigin + (currentmodelrun - 1) * G.srcsteps[1] source.zcoord = source.zcoordorigin + (currentmodelrun - 1) * G.srcsteps[2] if G.rxsteps[0] != 0 or G.rxsteps[1] != 0 or G.rxsteps[2] != 0: for receiver in G.rxs: if currentmodelrun == 1: if receiver.xcoord + G.rxsteps[0] * modelend < 0 or receiver.xcoord + G.rxsteps[0] * modelend > G.nx or receiver.ycoord + G.rxsteps[1] * modelend < 0 or receiver.ycoord + G.rxsteps[1] * modelend > G.ny or receiver.zcoord + G.rxsteps[2] * modelend < 0 or receiver.zcoord + G.rxsteps[2] * modelend > G.nz: raise GeneralError('Receiver(s) will be stepped to a position outside the domain.') receiver.xcoord = receiver.xcoordorigin + (currentmodelrun - 1) * G.rxsteps[0] receiver.ycoord = receiver.ycoordorigin + (currentmodelrun - 1) * G.rxsteps[1] receiver.zcoord = receiver.zcoordorigin + (currentmodelrun - 1) * G.rxsteps[2] # Write files for any geometry views and geometry object outputs if not (G.geometryviews or G.geometryobjectswrite) and args.geometry_only and G.messages: print(Fore.RED + '\nWARNING: No geometry views or geometry objects to output found.' + Style.RESET_ALL) if G.geometryviews and (not args.geometry_fixed or currentmodelrun == 1): if G.messages: print() for i, geometryview in enumerate(G.geometryviews): geometryview.set_filename(appendmodelnumberGeometry, G) pbar = tqdm(total=geometryview.datawritesize, unit='byte', unit_scale=True, desc='Writing geometry view file {}/{}, {}'.format(i + 1, len(G.geometryviews), os.path.split(geometryview.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=not G.progressbars) geometryview.write_vtk(G, pbar) pbar.close() if G.geometryobjectswrite: for i, geometryobject in enumerate(G.geometryobjectswrite): pbar = tqdm(total=geometryobject.datawritesize, unit='byte', unit_scale=True, desc='Writing geometry object file {}/{}, {}'.format(i + 1, len(G.geometryobjectswrite), os.path.split(geometryobject.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=not G.progressbars) geometryobject.write_hdf5(G, pbar) pbar.close() # If only writing geometry information if args.geometry_only: tsolve = 0 # Run simulation else: # Output filename inputdirectory, inputfilename = os.path.split(os.path.join(G.inputdirectory, G.inputfilename)) if G.outputdirectory is None: outputdir = inputdirectory else: outputdir = G.outputdirectory # Save current directory curdir = os.getcwd() os.chdir(inputdirectory) outputdir = os.path.abspath(outputdir) if not os.path.isdir(outputdir): os.mkdir(outputdir) if G.messages: print('\nCreated output directory: {}'.format(outputdir)) # Restore current directory os.chdir(curdir) basename, ext = os.path.splitext(inputfilename) outputfile = os.path.join(outputdir, basename + appendmodelnumber + '.out') if G.messages: print('\nOutput file: {}\n'.format(outputfile)) # Main FDTD solving functions for either CPU or GPU if G.gpu is None: tsolve = solve_cpu(currentmodelrun, modelend, G) else: tsolve, memsolve = solve_gpu(currentmodelrun, modelend, G) # Write an output file in HDF5 format write_hdf5_outputfile(outputfile, G) # Write any snapshots to file if G.snapshots: # Create directory and construct filename from user-supplied name and model run number snapshotdir = os.path.join(G.inputdirectory, os.path.splitext(G.inputfilename)[0] + '_snaps' + appendmodelnumber) if not os.path.exists(snapshotdir): os.mkdir(snapshotdir) if G.messages: print() for i, snap in enumerate(G.snapshots): snap.filename = os.path.abspath(os.path.join(snapshotdir, snap.basefilename + '.vti')) pbar = tqdm(total=snap.vtkdatawritesize, leave=True, unit='byte', unit_scale=True, desc='Writing snapshot file {} of {}, {}'.format(i + 1, len(G.snapshots), os.path.split(snap.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=not G.progressbars) snap.write_vtk_imagedata(pbar, G) pbar.close() if G.messages: print() if G.messages: if G.gpu is None: print('Memory (RAM) used: ~{}'.format(human_size(p.memory_info().rss))) else: print('Memory (RAM) used: ~{} host + ~{} GPU'.format(human_size(p.memory_info().rss), human_size(memsolve))) print('Solving time [HH:MM:SS]: {}'.format(datetime.timedelta(seconds=tsolve))) # If geometry information to be reused between model runs then FDTDGrid # class instance must be global so that it persists if not args.geometry_fixed or currentmodelrun is modelend: del G return tsolve
def run_model(args, modelrun, numbermodelruns, inputfile, usernamespace): """Runs a model - processes the input file; builds the Yee cells; calculates update coefficients; runs main FDTD loop. Args: args (dict): Namespace with command line arguments modelrun (int): Current model run number. numbermodelruns (int): Total number of model runs. inputfile (str): Name of the input file to open. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. """ # Monitor memory usage p = psutil.Process() print('\n{}\n\nModel input file: {}\n'.format(68 * '*', inputfile)) # Add the current model run to namespace that can be accessed by user in any Python code blocks in input file usernamespace['current_model_run'] = modelrun print('Constants/variables available for Python scripting: {}\n'.format( usernamespace)) # Process any user input Python commands processedlines = python_code_blocks(inputfile, usernamespace) # Write a file containing the input commands after Python blocks have been processed if args.write_python: write_python_processed(inputfile, modelrun, numbermodelruns, processedlines) # Check validity of command names & that essential commands are present singlecmds, multicmds, geometry = check_cmd_names(processedlines) # Initialise an instance of the FDTDGrid class G = FDTDGrid() G.inputdirectory = usernamespace['inputdirectory'] # Process parameters for commands that can only occur once in the model process_singlecmds(singlecmds, multicmds, G) # Process parameters for commands that can occur multiple times in the model process_multicmds(multicmds, G) # Initialise an array for volumetric material IDs (solid), boolean arrays for specifying materials not to be averaged (rigid), # an array for cell edge IDs (ID), and arrays for the field components. G.initialise_std_arrays() # Process the geometry commands in the order they were given tinputprocstart = perf_counter() process_geometrycmds(geometry, G) tinputprocend = perf_counter() print('\nInput file processed in [HH:MM:SS]: {}'.format( datetime.timedelta(seconds=int(tinputprocend - tinputprocstart)))) # Build the PML and calculate initial coefficients build_pml(G) # Build the model, i.e. set the material properties (ID) for every edge of every Yee cell tbuildstart = perf_counter() build_electric_components(G.solid, G.rigidE, G.ID, G) build_magnetic_components(G.solid, G.rigidH, G.ID, G) tbuildend = perf_counter() print('\nModel built in [HH:MM:SS]: {}'.format( datetime.timedelta(seconds=int(tbuildend - tbuildstart)))) # Process any voltage sources (that have resistance) to create a new material at the source location for voltagesource in G.voltagesources: voltagesource.create_material(G) # Initialise arrays of update coefficients to pass to update functions G.initialise_std_updatecoeff_arrays(len(G.materials)) # Initialise arrays of update coefficients and temporary values if there are any dispersive materials if Material.maxpoles != 0: G.initialise_dispersive_arrays(len(G.materials)) # Calculate update coefficients, store in arrays, and list materials in model if G.messages: print('\nMaterials:\n') print('ID\tName\t\tProperties') print('{}'.format('-' * 50)) for material in G.materials: # Calculate update coefficients for material material.calculate_update_coeffsE(G) material.calculate_update_coeffsH(G) # Store all update coefficients together G.updatecoeffsE[ material. numID, :] = material.CA, material.CBx, material.CBy, material.CBz, material.srce G.updatecoeffsH[ material. numID, :] = material.DA, material.DBx, material.DBy, material.DBz, material.srcm # Store coefficients for any dispersive materials if Material.maxpoles != 0: z = 0 for pole in range(Material.maxpoles): G.updatecoeffsdispersive[ material.numID, z:z + 3] = e0 * material.eqt2[pole], material.eqt[ pole], material.zt[pole] z += 3 if G.messages: if material.deltaer and material.tau: tmp = 'delta_epsr={}, tau={} secs; '.format( ', '.join('{:g}'.format(deltaer) for deltaer in material.deltaer), ', '.join('{:g}'.format(tau) for tau in material.tau)) else: tmp = '' if material.average: dielectricsmoothing = 'dielectric smoothing permitted.' else: dielectricsmoothing = 'dielectric smoothing not permitted.' print( '{:3}\t{:12}\tepsr={:g}, sig={:g} S/m; mur={:g}, sig*={:g} S/m; ' .format(material.numID, material.ID, material.er, material.se, material.mr, material.sm) + tmp + dielectricsmoothing) # Check to see if numerical dispersion might be a problem if dispersion_check(G.waveforms, G.materials, G.dx, G.dy, G.dz): print( '\nWARNING: Potential numerical dispersion in the simulation. Check the spatial discretisation against the smallest wavelength present.' ) # Write files for any geometry views if not G.geometryviews and args.geometry_only: raise GeneralError('No geometry views found.') elif G.geometryviews: tgeostart = perf_counter() for geometryview in G.geometryviews: geometryview.write_file(modelrun, numbermodelruns, G) tgeoend = perf_counter() print('\nGeometry file(s) written in [HH:MM:SS]: {}'.format( datetime.timedelta(seconds=int(tgeoend - tgeostart)))) # Run simulation if not doing only geometry if not args.geometry_only: # Prepare any snapshot files for snapshot in G.snapshots: snapshot.prepare_file(modelrun, numbermodelruns, G) # Prepare output file inputfileparts = os.path.splitext(inputfile) if numbermodelruns == 1: outputfile = inputfileparts[0] + '.out' else: outputfile = inputfileparts[0] + str(modelrun) + '.out' sys.stdout.write('\nOutput to file: {}\n'.format(outputfile)) sys.stdout.flush() f = prepare_output_file(outputfile, G) # Adjust position of sources and receivers if required if G.srcstepx > 0 or G.srcstepy > 0 or G.srcstepz > 0: for source in itertools.chain(G.hertziandipoles, G.magneticdipoles, G.voltagesources, G.transmissionlines): source.xcoord += (modelrun - 1) * G.srcstepx source.ycoord += (modelrun - 1) * G.srcstepy source.zcoord += (modelrun - 1) * G.srcstepz if G.rxstepx > 0 or G.rxstepy > 0 or G.rxstepz > 0: for receiver in G.rxs: receiver.xcoord += (modelrun - 1) * G.rxstepx receiver.ycoord += (modelrun - 1) * G.rxstepy receiver.zcoord += (modelrun - 1) * G.rxstepz ################################## # Main FDTD calculation loop # ################################## tsolvestart = perf_counter() # Absolute time abstime = 0 for timestep in range(G.iterations): if timestep == 0: tstepstart = perf_counter() # Write field outputs to file write_output(f, timestep, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G) # Write any snapshots to file for snapshot in G.snapshots: if snapshot.time == timestep + 1: snapshot.write_snapshot(G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G) # Update electric field components if Material.maxpoles == 0: # All materials are non-dispersive so do standard update update_electric(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsE, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) elif Material.maxpoles == 1: # If there are any dispersive materials do 1st part of dispersive update (it is split into two parts as it requires present and updated electric field values). update_electric_dispersive_1pole_A(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsE, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) elif Material.maxpoles > 1: update_electric_dispersive_multipole_A( G.nx, G.ny, G.nz, G.nthreads, Material.maxpoles, G.updatecoeffsE, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) # Update electric field components with the PML correction update_electric_pml(G) # Update electric field components from sources for voltagesource in G.voltagesources: voltagesource.update_electric(abstime, G.updatecoeffsE, G.ID, G.Ex, G.Ey, G.Ez, G) for transmissionline in G.transmissionlines: transmissionline.update_electric(abstime, G.Ex, G.Ey, G.Ez, G) for hertziandipole in G.hertziandipoles: # Update any Hertzian dipole sources last hertziandipole.update_electric(abstime, G.updatecoeffsE, G.ID, G.Ex, G.Ey, G.Ez, G) # If there are any dispersive materials do 2nd part of dispersive update (it is split into two parts as it requires present and updated electric field values). Therefore it can only be completely updated after the electric field has been updated by the PML and source updates. if Material.maxpoles == 1: update_electric_dispersive_1pole_B(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez) elif Material.maxpoles > 1: update_electric_dispersive_multipole_B( G.nx, G.ny, G.nz, G.nthreads, Material.maxpoles, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez) # Increment absolute time value abstime += 0.5 * G.dt # Update magnetic field components update_magnetic(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsH, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) # Update magnetic field components with the PML correction update_magnetic_pml(G) # Update magnetic field components from sources for transmissionline in G.transmissionlines: transmissionline.update_magnetic(abstime, G.Hx, G.Hy, G.Hz, G) for magneticdipole in G.magneticdipoles: magneticdipole.update_magnetic(abstime, G.updatecoeffsH, G.ID, G.Hx, G.Hy, G.Hz, G) # Increment absolute time value abstime += 0.5 * G.dt # Calculate time for two iterations, used to estimate overall runtime if timestep == 1: tstepend = perf_counter() runtime = datetime.timedelta( seconds=int((tstepend - tstepstart) / 2 * G.iterations)) sys.stdout.write( 'Estimated runtime [HH:MM:SS]: {}\n'.format(runtime)) sys.stdout.write('Solving for model run {} of {}...\n'.format( modelrun, numbermodelruns)) sys.stdout.flush() elif timestep > 1: update_progress((timestep + 1) / G.iterations) # Close output file f.close() tsolveend = perf_counter() print('\n\nSolving took [HH:MM:SS]: {}'.format( datetime.timedelta(seconds=int(tsolveend - tsolvestart)))) print('Peak memory (approx) used: {}'.format( human_size(p.memory_info().rss)))
def run_mpi_sim(args, inputfile, usernamespace, optparams=None): """ Run mixed mode MPI/OpenMP simulation - MPI task farm for models with each model parallelised using either OpenMP (CPU) or CUDA (GPU) Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. optparams (dict): Optional argument. For Taguchi optimisation it provides the parameters to optimise and their values. """ from mpi4py import MPI # Get name of processor/host name = MPI.Get_processor_name() # Set range for number of models to run modelstart = args.restart if args.restart else 1 modelend = modelstart + args.n numbermodelruns = args.n # Number of workers and command line flag to indicate a spawned worker worker = '--mpi-worker' numberworkers = args.mpi - 1 # Master process if worker not in sys.argv: tsimstart = perf_counter() print('MPI master rank (PID {}) on {} using {} workers'.format(os.getpid(), name, numberworkers)) # Create a list of work worklist = [] for model in range(modelstart, modelend): workobj = dict() workobj['currentmodelrun'] = model if optparams: workobj['optparams'] = optparams worklist.append(workobj) # Add stop sentinels worklist += ([StopIteration] * numberworkers) # Spawn workers comm = MPI.COMM_WORLD.Spawn(sys.executable, args=['-m', 'gprMax', '-n', str(args.n)] + sys.argv[1::] + [worker], maxprocs=numberworkers) # Reply to whoever asks until done status = MPI.Status() for work in worklist: comm.recv(source=MPI.ANY_SOURCE, status=status) comm.send(obj=work, dest=status.Get_source()) # Shutdown comm.Disconnect() tsimend = perf_counter() simcompletestr = '\n=== Simulation completed in [HH:MM:SS]: {}'.format(datetime.timedelta(seconds=tsimend - tsimstart)) print('{} {}\n'.format(simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr)))) # Worker process elif worker in sys.argv: # Connect to parent try: comm = MPI.Comm.Get_parent() # get MPI communicator object rank = comm.Get_rank() # rank of this process except: raise ValueError('Could not connect to parent') # Ask for work until stop sentinel for work in iter(lambda: comm.sendrecv(0, dest=0), StopIteration): currentmodelrun = work['currentmodelrun'] # Get info and setup device ID for GPU(s) gpuinfo = '' if args.gpu is not None: # Set device ID for multiple GPUs if isinstance(args.gpu, list): deviceID = (rank - 1) % len(args.gpu) args.gpu = next(gpu for gpu in args.gpu if gpu.deviceID == deviceID) gpuinfo = ' using {} - {}, {} RAM '.format(args.gpu.deviceID, args.gpu.name, human_size(args.gpu.totalmem, a_kilobyte_is_1024_bytes=True)) print('MPI worker rank {} (PID {}) starting model {}/{}{} on {}'.format(rank, os.getpid(), currentmodelrun, numbermodelruns, gpuinfo, name)) # If Taguchi optimistaion, add specific value for each parameter to # optimise for each experiment to user accessible namespace if 'optparams' in work: tmp = {} tmp.update((key, value[currentmodelrun - 1]) for key, value in work['optparams'].items()) modelusernamespace = usernamespace.copy() modelusernamespace.update({'optparams': tmp}) else: modelusernamespace = usernamespace # Run the model run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, modelusernamespace) # Shutdown comm.Disconnect()
def run_main(args): """ Top-level function that controls what mode of simulation (standard/optimsation/benchmark etc...) is run. Args: args (dict): Namespace with input arguments from command line or api. """ with open_path_file(args.inputfile) as inputfile: # Get information about host machine hostinfo = get_host_info() hyperthreading = ', {} cores with Hyper-Threading'.format(hostinfo['logicalcores']) if hostinfo['hyperthreading'] else '' print('\nHost: {}; {} x {} ({} cores{}); {} RAM; {}'.format(hostinfo['machineID'], hostinfo['sockets'], hostinfo['cpuID'], hostinfo['physicalcores'], hyperthreading, human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion'])) # Get information/setup Nvidia GPU(s) if args.gpu is not None: # Extract first item of list, either True to automatically determine device ID, # or an integer to manually specify device ID args.gpu = args.gpu[0] gpus = detect_gpus() # If a device ID is specified check it is valid if not isinstance(args.gpu, bool): if args.gpu > len(gpus) - 1: raise GeneralError('GPU with device ID {} does not exist'.format(args.gpu)) # Set args.gpu to GPU object to access elsewhere args.gpu = next(gpu for gpu in gpus if gpu.deviceID == args.gpu) # If no device ID is specified else: # If in MPI mode then set args.gpu to list of available GPUs if args.mpi: if args.mpi - 1 > len(gpus): raise GeneralError('Too many MPI tasks requested ({}). The number of MPI tasks requested can only be a maximum of the number of GPU(s) detected plus one, i.e. {} GPU worker tasks + 1 CPU master task'.format(args.mpi, len(gpus))) args.gpu = gpus # If benchmarking mode then set args.gpu to list of available GPUs elif args.benchmark: args.gpu = gpus # Otherwise set args.gpu to GPU object with default device ID (0) to access elsewhere else: args.gpu = next(gpu for gpu in gpus if gpu.deviceID == 0) # Create a separate namespace that users can access in any Python code blocks in the input file usernamespace = {'c': c, 'e0': e0, 'm0': m0, 'z0': z0, 'number_model_runs': args.n, 'inputfile': os.path.abspath(inputfile.name)} ####################################### # Process for benchmarking simulation # ####################################### if args.benchmark: if args.mpi or args.opt_taguchi or args.task or args.n > 1: raise GeneralError('Benchmarking mode cannot be combined with MPI, job array, or Taguchi optimisation modes, or multiple model runs.') run_benchmark_sim(args, inputfile, usernamespace) #################################################### # Process for simulation with Taguchi optimisation # #################################################### elif args.opt_taguchi: if args.mpi_worker: # Special case for MPI spawned workers - they do not need to enter the Taguchi optimisation mode run_mpi_sim(args, inputfile, usernamespace) else: from gprMax.optimisation_taguchi import run_opt_sim run_opt_sim(args, inputfile, usernamespace) ################################################ # Process for standard simulation (CPU or GPU) # ################################################ else: # Mixed mode MPI with OpenMP or CUDA - MPI task farm for models with each model parallelised with OpenMP (CPU) or CUDA (GPU) if args.mpi: if args.n == 1: raise GeneralError('MPI is not beneficial when there is only one model to run') if args.task: raise GeneralError('MPI cannot be combined with job array mode') run_mpi_sim(args, inputfile, usernamespace) # Standard behaviour - models run serially with each model parallelised with OpenMP (CPU) or CUDA (GPU) else: if args.task and args.restart: raise GeneralError('Job array and restart modes cannot be used together') run_std_sim(args, inputfile, usernamespace)
def process_singlecmds(singlecmds, G): """Checks the validity of command parameters and creates instances of classes of parameters. Args: singlecmds (dict): Commands that can only occur once in the model. G (class): Grid class instance - holds essential parameters describing the model. """ # Check validity of command parameters in order needed # messages cmd = '#messages' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') if singlecmds[cmd].lower() == 'y': G.messages = True elif singlecmds[cmd].lower() == 'n': G.messages = False else: raise CmdInputError(cmd + ' requires input values of either y or n') # Title cmd = '#title' if singlecmds[cmd] != 'None': G.title = singlecmds[cmd] if G.messages: print('Model title: {}'.format(G.title)) # Number of processors to run on (OpenMP) cmd = '#num_threads' os.environ['OMP_WAIT_POLICY'] = 'active' os.environ['OMP_DYNAMIC'] = 'false' os.environ['OMP_PROC_BIND'] = 'true' if singlecmds[cmd] != 'None': tmp = tuple(int(x) for x in singlecmds[cmd].split()) if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter to specify the number of threads to use') if tmp[0] < 1: raise CmdInputError(cmd + ' requires the value to be an integer not less than one') G.nthreads = tmp[0] os.environ['OMP_NUM_THREADS'] = str(G.nthreads) elif os.environ.get('OMP_NUM_THREADS'): G.nthreads = int(os.environ.get('OMP_NUM_THREADS')) else: # Set number of threads to number of physical CPU cores, i.e. avoid hyperthreading with OpenMP G.nthreads = psutil.cpu_count(logical=False) os.environ['OMP_NUM_THREADS'] = str(G.nthreads) if G.messages: print('Number of threads: {}'.format(G.nthreads)) if G.nthreads > psutil.cpu_count(logical=False): print('\nWARNING: You have specified more threads ({}) than available physical CPU cores ({}). This may lead to degraded performance.'.format(G.nthreads, psutil.cpu_count(logical=False))) # Spatial discretisation cmd = '#dx_dy_dz' tmp = [float(x) for x in singlecmds[cmd].split()] if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') if tmp[0] <= 0: raise CmdInputError(cmd + ' requires the x-direction spatial step to be greater than zero') if tmp[1] <= 0: raise CmdInputError(cmd + ' requires the y-direction spatial step to be greater than zero') if tmp[2] <= 0: raise CmdInputError(cmd + ' requires the z-direction spatial step to be greater than zero') G.dx = tmp[0] G.dy = tmp[1] G.dz = tmp[2] if G.messages: print('Spatial discretisation: {:g} x {:g} x {:g}m'.format(G.dx, G.dy, G.dz)) # Domain cmd = '#domain' tmp = [float(x) for x in singlecmds[cmd].split()] if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.nx = round_value(tmp[0]/G.dx) G.ny = round_value(tmp[1]/G.dy) G.nz = round_value(tmp[2]/G.dz) if G.nx == 0 or G.ny == 0 or G.nz == 0: raise CmdInputError(cmd + ' requires at least one cell in every dimension') if G.messages: print('Domain size: {:g} x {:g} x {:g}m ({:d} x {:d} x {:d} = {:g} cells)'.format(tmp[0], tmp[1], tmp[2], G.nx, G.ny, G.nz, (G.nx * G.ny * G.nz))) # Guesstimate at memory usage mem = (((G.nx + 1) * (G.ny + 1) * (G.nz + 1) * 13 * np.dtype(floattype).itemsize + (G.nx + 1) * (G.ny + 1) * (G.nz + 1) * 18) * 1.1) + 30e6 print('Memory (RAM) usage: ~{} required, {} available'.format(human_size(mem), human_size(psutil.virtual_memory().total))) # Time step CFL limit (use either 2D or 3D) and default PML thickness if G.nx == 1: G.dt = 1 / (c * np.sqrt((1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) G.dtlimit = '2D' G.pmlthickness = (0, G.pmlthickness, G.pmlthickness, 0, G.pmlthickness, G.pmlthickness) elif G.ny == 1: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dz) * (1 / G.dz))) G.dtlimit = '2D' G.pmlthickness = (G.pmlthickness, 0, G.pmlthickness, G.pmlthickness, 0, G.pmlthickness) elif G.nz == 1: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy))) G.dtlimit = '2D' G.pmlthickness = (G.pmlthickness, G.pmlthickness, 0, G.pmlthickness, G.pmlthickness, 0) else: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) G.dtlimit = '3D' G.pmlthickness = (G.pmlthickness, G.pmlthickness, G.pmlthickness, G.pmlthickness, G.pmlthickness, G.pmlthickness) # Round down time step to nearest float with precision one less than hardware maximum. Avoids inadvertently exceeding the CFL due to binary representation of floating point number. G.dt = round_value(G.dt, decimalplaces=d.getcontext().prec - 1) if G.messages: print('Time step (at {} CFL limit): {:g} secs'.format(G.dtlimit, G.dt)) # Time step stability factor cmd = '#time_step_stability_factor' if singlecmds[cmd] != 'None': tmp = tuple(float(x) for x in singlecmds[cmd].split()) if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') if tmp[0] <= 0 or tmp[0] > 1: raise CmdInputError(cmd + ' requires the value of the time step stability factor to be between zero and one') G.dt = G.dt * tmp[0] if G.messages: print('Time step (modified): {:g} secs'.format(G.dt)) # Time window cmd = '#time_window' tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter to specify the time window. Either in seconds or number of iterations.') tmp = tmp[0].lower() # If number of iterations given try: tmp = int(tmp) G.timewindow = (tmp - 1) * G.dt G.iterations = tmp # If real floating point value given except: tmp = float(tmp) if tmp > 0: G.timewindow = tmp G.iterations = round_value((tmp / G.dt)) + 1 else: raise CmdInputError(cmd + ' must have a value greater than zero') if G.messages: print('Time window: {:g} secs ({} iterations)'.format(G.timewindow, G.iterations)) # PML cmd = '#pml_cells' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 1 and len(tmp) != 6: raise CmdInputError(cmd + ' requires either one or six parameters') if len(tmp) == 1: G.pmlthickness = (int(tmp[0]), int(tmp[0]), int(tmp[0]), int(tmp[0]), int(tmp[0]), int(tmp[0])) else: G.pmlthickness = (int(tmp[0]), int(tmp[1]), int(tmp[2]), int(tmp[3]), int(tmp[4]), int(tmp[5])) if 2*G.pmlthickness[0] >= G.nx or 2*G.pmlthickness[1] >= G.ny or 2*G.pmlthickness[2] >= G.nz or 2*G.pmlthickness[3] >= G.nx or 2*G.pmlthickness[4] >= G.ny or 2*G.pmlthickness[5] >= G.nz: raise CmdInputError(cmd + ' has too many cells for the domain size') # src_steps cmd = '#src_steps' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.srcstepx = round_value(float(tmp[0])/G.dx) G.srcstepy = round_value(float(tmp[1])/G.dy) G.srcstepz = round_value(float(tmp[2])/G.dz) if G.messages: print('Simple sources will step {:g}m, {:g}m, {:g}m for each model run.'.format(G.srcstepx * G.dx, G.srcstepy * G.dy, G.srcstepz * G.dz)) # rx_steps cmd = '#rx_steps' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.rxstepx = round_value(float(tmp[0])/G.dx) G.rxstepy = round_value(float(tmp[1])/G.dy) G.rxstepz = round_value(float(tmp[2])/G.dz) if G.messages: print('All receivers will step {:g}m, {:g}m, {:g}m for each model run.'.format(G.rxstepx * G.dx, G.rxstepy * G.dy, G.rxstepz * G.dz)) # Excitation file for user-defined source waveforms cmd = '#excitation_file' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') excitationfile = tmp[0] # See if file exists at specified path and if not try input file directory if not os.path.isfile(excitationfile): excitationfile = os.path.abspath(os.path.join(G.inputdirectory, excitationfile)) # Get waveform names with open(excitationfile, 'r') as f: waveformIDs = f.readline().split() # Read all waveform values into an array waveformvalues = np.loadtxt(excitationfile, skiprows=1, dtype=floattype) for waveform in range(len(waveformIDs)): if any(x.ID == waveformIDs[waveform] for x in G.waveforms): raise CmdInputError('Waveform with ID {} already exists'.format(waveformIDs[waveform])) w = Waveform() w.ID = waveformIDs[waveform] w.type = 'user' if len(waveformvalues.shape) == 1: w.uservalues = waveformvalues[:] else: w.uservalues = waveformvalues[:,waveform] if G.messages: print('User waveform {} created.'.format(w.ID)) G.waveforms.append(w)
args = parser.parse_args() # Load base result baseresult = dict(np.load(args.baseresult)) # Get machine/CPU/OS details hostinfo = get_host_info() try: machineIDlong = str(baseresult['machineID']) # machineIDlong = 'Dell PowerEdge R630; Intel(R) Xeon(R) CPU E5-2630 v3 @ 2.40GHz; Linux (3.10.0-327.18.2.el7.x86_64)' # Use to manually describe machine machineID = machineIDlong.split(';')[0] cpuID = machineIDlong.split(';')[1] cpuID = cpuID.split('GHz')[0].split('x')[1][1::] + 'GHz' except KeyError: hyperthreading = ', {} cores with Hyper-Threading'.format(hostinfo['logicalcores']) if hostinfo['hyperthreading'] else '' machineIDlong = '{}; {} x {} ({} cores{}); {} RAM; {}'.format(hostinfo['machineID'], hostinfo['sockets'], hostinfo['cpuID'], hostinfo['physicalcores'], hyperthreading, human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion']) print('Host: {}'.format(machineIDlong)) # Base result - general info print('Model: {}'.format(args.baseresult)) cells = np.array([baseresult['numcells'][0]]) # Length of cubic model side for cells per second metric baseplotlabel = os.path.splitext(os.path.split(args.baseresult)[1])[0] + '.in' # Base result - CPU threads and times info from Numpy archive if baseresult['cputhreads'].size != 0: for i in range(len(baseresult['cputhreads'])): print('{} CPU (OpenMP) thread(s): {:g} s'.format(baseresult['cputhreads'][i], baseresult['cputimes'][i])) cpucellspersec = np.array([(baseresult['numcells'][0] * baseresult['numcells'][1] * baseresult['numcells'][2] * baseresult['iterations']) / baseresult['cputimes'][0]]) # Base result - GPU time info gpuIDs = baseresult['gpuIDs'].tolist()
def run_main(args): """Top-level function that controls what mode of simulation (standard/optimsation/benchmark etc...) is run. Args: args (dict): Namespace with input arguments from command line or api. """ with open_path_file(args.inputfile) as inputfile: # Get information about host machine hostinfo = get_host_info() hyperthreading = ', {} cores with Hyper-Threading'.format(hostinfo['logicalcores']) if hostinfo['hyperthreading'] else '' print('\nHost: {}; {} x {} ({} cores{}); {} RAM; {}'.format(hostinfo['machineID'], hostinfo['sockets'], hostinfo['cpuID'], hostinfo['physicalcores'], hyperthreading, human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion'])) # Create a separate namespace that users can access in any Python code blocks in the input file usernamespace = {'c': c, 'e0': e0, 'm0': m0, 'z0': z0, 'number_model_runs': args.n, 'inputfile': os.path.abspath(inputfile.name)} ####################################### # Process for benchmarking simulation # ####################################### if args.benchmark: if args.mpi or args.opt_taguchi or args.task or args.n > 1: raise GeneralError('Benchmarking mode cannot be combined with MPI, job array, or Taguchi optimisation modes, or multiple model runs.') run_benchmark_sim(args, inputfile, usernamespace) #################################################### # Process for simulation with Taguchi optimisation # #################################################### elif args.opt_taguchi: if args.mpi_worker: # Special case for MPI spawned workers - they do not need to enter the Taguchi optimisation mode run_mpi_sim(args, inputfile, usernamespace) else: from gprMax.optimisation_taguchi import run_opt_sim run_opt_sim(args, inputfile, usernamespace) ################################################ # Process for standard simulation (CPU or GPU) # ################################################ else: # Mixed mode MPI with OpenMP or CUDA - MPI task farm for models with each model parallelised with OpenMP (CPU) or CUDA (GPU) if args.mpi: if args.n == 1: raise GeneralError('MPI is not beneficial when there is only one model to run') if args.task: raise GeneralError('MPI cannot be combined with job array mode') run_mpi_sim(args, inputfile, usernamespace) # Standard behaviour - models run serially with each model parallelised with OpenMP (CPU) or CUDA (GPU) else: if args.task and args.restart: raise GeneralError('Job array and restart modes cannot be used together') run_std_sim(args, inputfile, usernamespace)
def process_singlecmds(singlecmds, G): """Checks the validity of command parameters and creates instances of classes of parameters. Args: singlecmds (dict): Commands that can only occur once in the model. G (class): Grid class instance - holds essential parameters describing the model. """ # Check validity of command parameters in order needed # messages cmd = '#messages' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') if singlecmds[cmd].lower() == 'y': G.messages = True elif singlecmds[cmd].lower() == 'n': G.messages = False else: raise CmdInputError(cmd + ' requires input values of either y or n') # Title cmd = '#title' if singlecmds[cmd] != 'None': G.title = singlecmds[cmd] if G.messages: print('Model title: {}'.format(G.title)) # Number of threads (OpenMP) to use cmd = '#num_threads' if sys.platform == 'darwin': os.environ['OMP_WAIT_POLICY'] = 'ACTIVE' # What to do with threads when they are waiting; can drastically effect performance os.environ['OMP_DYNAMIC'] = 'FALSE' os.environ['OMP_PROC_BIND'] = 'TRUE' # Bind threads to physical cores if singlecmds[cmd] != 'None': tmp = tuple(int(x) for x in singlecmds[cmd].split()) if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter to specify the number of threads to use') if tmp[0] < 1: raise CmdInputError(cmd + ' requires the value to be an integer not less than one') G.nthreads = tmp[0] os.environ['OMP_NUM_THREADS'] = str(G.nthreads) elif os.environ.get('OMP_NUM_THREADS'): G.nthreads = int(os.environ.get('OMP_NUM_THREADS')) else: # Set number of threads to number of physical CPU cores, i.e. avoid hyperthreading with OpenMP G.nthreads = psutil.cpu_count(logical=False) os.environ['OMP_NUM_THREADS'] = str(G.nthreads) if G.messages: machineID, cpuID, osversion = get_machine_cpu_os() print('Number of threads: {} ({})'.format(G.nthreads, cpuID)) if G.nthreads > psutil.cpu_count(logical=False): print(Fore.RED + 'WARNING: You have specified more threads ({}) than available physical CPU cores ({}). This may lead to degraded performance.'.format(G.nthreads, psutil.cpu_count(logical=False)) + Style.RESET_ALL) # Spatial discretisation cmd = '#dx_dy_dz' tmp = [float(x) for x in singlecmds[cmd].split()] if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') if tmp[0] <= 0: raise CmdInputError(cmd + ' requires the x-direction spatial step to be greater than zero') if tmp[1] <= 0: raise CmdInputError(cmd + ' requires the y-direction spatial step to be greater than zero') if tmp[2] <= 0: raise CmdInputError(cmd + ' requires the z-direction spatial step to be greater than zero') G.dx = tmp[0] G.dy = tmp[1] G.dz = tmp[2] if G.messages: print('Spatial discretisation: {:g} x {:g} x {:g}m'.format(G.dx, G.dy, G.dz)) # Domain cmd = '#domain' tmp = [float(x) for x in singlecmds[cmd].split()] if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.nx = round_value(tmp[0] / G.dx) G.ny = round_value(tmp[1] / G.dy) G.nz = round_value(tmp[2] / G.dz) if G.nx == 0 or G.ny == 0 or G.nz == 0: raise CmdInputError(cmd + ' requires at least one cell in every dimension') if G.messages: print('Domain size: {:g} x {:g} x {:g}m ({:d} x {:d} x {:d} = {:g} cells)'.format(tmp[0], tmp[1], tmp[2], G.nx, G.ny, G.nz, (G.nx * G.ny * G.nz))) # Estimate memory (RAM) usage stdoverhead = 70e6 floatarrays = (6 + 6 + 1) * (G.nx + 1) * (G.ny + 1) * (G.nz + 1) * np.dtype(floattype).itemsize # 6 x field arrays + 6 x ID arrays + 1 x solid array rigidarray = (12 + 6) * (G.nx + 1) * (G.ny + 1) * (G.nz + 1) * np.dtype(np.int8).itemsize memestimate = stdoverhead + floatarrays + rigidarray if memestimate > psutil.virtual_memory().total: print(Fore.RED + 'WARNING: Estimated memory (RAM) required ~{} exceeds {} detected!\n'.format(human_size(memestimate), human_size(psutil.virtual_memory().total, a_kilobyte_is_1024_bytes=True)) + Style.RESET_ALL) if G.messages: print('Estimated memory (RAM) required: ~{} ({} detected)'.format(human_size(memestimate), human_size(psutil.virtual_memory().total, a_kilobyte_is_1024_bytes=True))) # Time step CFL limit (use either 2D or 3D) and default PML thickness if G.nx == 1: G.dt = 1 / (c * np.sqrt((1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) G.dimension = '2D' G.pmlthickness['xminus'] = 0 G.pmlthickness['xplus'] = 0 elif G.ny == 1: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dz) * (1 / G.dz))) G.dimension = '2D' G.pmlthickness['yminus'] = 0 G.pmlthickness['yplus'] = 0 elif G.nz == 1: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy))) G.dimension = '2D' G.pmlthickness['zminus'] = 0 G.pmlthickness['zplus'] = 0 else: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) G.dimension = '3D' # Round down time step to nearest float with precision one less than hardware maximum. Avoids inadvertently exceeding the CFL due to binary representation of floating point number. G.dt = round_value(G.dt, decimalplaces=d.getcontext().prec - 1) if G.messages: print('Time step (at {} CFL limit): {:g} secs'.format(G.dimension, G.dt)) # Time step stability factor cmd = '#time_step_stability_factor' if singlecmds[cmd] != 'None': tmp = tuple(float(x) for x in singlecmds[cmd].split()) if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') if tmp[0] <= 0 or tmp[0] > 1: raise CmdInputError(cmd + ' requires the value of the time step stability factor to be between zero and one') G.dt = G.dt * tmp[0] if G.messages: print('Time step (modified): {:g} secs'.format(G.dt)) # Time window cmd = '#time_window' tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter to specify the time window. Either in seconds or number of iterations.') tmp = tmp[0].lower() # If number of iterations given try: tmp = int(tmp) G.timewindow = (tmp - 1) * G.dt G.iterations = tmp # If real floating point value given except: tmp = float(tmp) if tmp > 0: G.timewindow = tmp G.iterations = round_value((tmp / G.dt)) + 1 else: raise CmdInputError(cmd + ' must have a value greater than zero') if G.messages: print('Time window: {:g} secs ({} iterations)'.format(G.timewindow, G.iterations)) # PML cmd = '#pml_cells' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 1 and len(tmp) != 6: raise CmdInputError(cmd + ' requires either one or six parameters') if len(tmp) == 1: for key in G.pmlthickness.keys(): G.pmlthickness[key] = int(tmp[0]) else: G.pmlthickness['xminus'] = int(tmp[0]) G.pmlthickness['yminus'] = int(tmp[1]) G.pmlthickness['zminus'] = int(tmp[2]) G.pmlthickness['xplus'] = int(tmp[3]) G.pmlthickness['yplus'] = int(tmp[4]) G.pmlthickness['zplus'] = int(tmp[5]) if 2 * G.pmlthickness['xminus'] >= G.nx or 2 * G.pmlthickness['yminus'] >= G.ny or 2 * G.pmlthickness['zminus'] >= G.nz or 2 * G.pmlthickness['xplus'] >= G.nx or 2 * G.pmlthickness['yplus'] >= G.ny or 2 * G.pmlthickness['zplus'] >= G.nz: raise CmdInputError(cmd + ' has too many cells for the domain size') # src_steps cmd = '#src_steps' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.srcsteps[0] = round_value(float(tmp[0]) / G.dx) G.srcsteps[1] = round_value(float(tmp[1]) / G.dy) G.srcsteps[2] = round_value(float(tmp[2]) / G.dz) if G.messages: print('Simple sources will step {:g}m, {:g}m, {:g}m for each model run.'.format(G.srcsteps[0] * G.dx, G.srcsteps[1] * G.dy, G.srcsteps[2] * G.dz)) # rx_steps cmd = '#rx_steps' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.rxsteps[0] = round_value(float(tmp[0]) / G.dx) G.rxsteps[1] = round_value(float(tmp[1]) / G.dy) G.rxsteps[2] = round_value(float(tmp[2]) / G.dz) if G.messages: print('All receivers will step {:g}m, {:g}m, {:g}m for each model run.'.format(G.rxsteps[0] * G.dx, G.rxsteps[1] * G.dy, G.rxsteps[2] * G.dz)) # Excitation file for user-defined source waveforms cmd = '#excitation_file' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') excitationfile = tmp[0] # See if file exists at specified path and if not try input file directory if not os.path.isfile(excitationfile): excitationfile = os.path.abspath(os.path.join(G.inputdirectory, excitationfile)) # Get waveform names with open(excitationfile, 'r') as f: waveformIDs = f.readline().split() # Read all waveform values into an array waveformvalues = np.loadtxt(excitationfile, skiprows=1, dtype=floattype) for waveform in range(len(waveformIDs)): if any(x.ID == waveformIDs[waveform] for x in G.waveforms): raise CmdInputError('Waveform with ID {} already exists'.format(waveformIDs[waveform])) w = Waveform() w.ID = waveformIDs[waveform] w.type = 'user' if len(waveformvalues.shape) == 1: w.uservalues = waveformvalues[:] else: w.uservalues = waveformvalues[:, waveform] if G.messages: print('User waveform {} created.'.format(w.ID)) G.waveforms.append(w)
def run_model(args, currentmodelrun, numbermodelruns, inputfile, usernamespace): """Runs a model - processes the input file; builds the Yee cells; calculates update coefficients; runs main FDTD loop. Args: args (dict): Namespace with command line arguments currentmodelrun (int): Current model run number. numbermodelruns (int): Total number of model runs. inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. Returns: tsolve (int): Length of time (seconds) of main FDTD calculations """ # Monitor memory usage p = psutil.Process() # Declare variable to hold FDTDGrid class global G # Normal model reading/building process; bypassed if geometry information to be reused if 'G' not in globals(): # Initialise an instance of the FDTDGrid class G = FDTDGrid() G.inputfilename = os.path.split(inputfile.name)[1] G.inputdirectory = os.path.dirname(os.path.abspath(inputfile.name)) inputfilestr = '\n--- Model {}/{}, input file: {}'.format( currentmodelrun, numbermodelruns, inputfile.name) print(Fore.GREEN + '{} {}\n'.format( inputfilestr, '-' * (get_terminal_width() - 1 - len(inputfilestr))) + Style.RESET_ALL) # Add the current model run to namespace that can be accessed by user in any Python code blocks in input file usernamespace['current_model_run'] = currentmodelrun # Read input file and process any Python or include commands processedlines = process_python_include_code(inputfile, usernamespace) # Print constants/variables in user-accessable namespace uservars = '' for key, value in sorted(usernamespace.items()): if key != '__builtins__': uservars += '{}: {}, '.format(key, value) print( 'Constants/variables used/available for Python scripting: {{{}}}\n' .format(uservars[:-2])) # Write a file containing the input commands after Python or include commands have been processed if args.write_processed: write_processed_file( os.path.join(G.inputdirectory, G.inputfilename), currentmodelrun, numbermodelruns, processedlines) # Check validity of command names and that essential commands are present singlecmds, multicmds, geometry = check_cmd_names(processedlines) # Create built-in materials m = Material(0, 'pec') m.se = float('inf') m.type = 'builtin' m.averagable = False G.materials.append(m) m = Material(1, 'free_space') m.type = 'builtin' G.materials.append(m) # Process parameters for commands that can only occur once in the model process_singlecmds(singlecmds, G) # Process parameters for commands that can occur multiple times in the model print() process_multicmds(multicmds, G) # Initialise an array for volumetric material IDs (solid), boolean arrays for specifying materials not to be averaged (rigid), # an array for cell edge IDs (ID) G.initialise_geometry_arrays() # Initialise arrays for the field components G.initialise_field_arrays() # Process geometry commands in the order they were given process_geometrycmds(geometry, G) # Build the PMLs and calculate initial coefficients print() if all(value == 0 for value in G.pmlthickness.values()): if G.messages: print('PML boundaries: switched off') pass # If all the PMLs are switched off don't need to build anything else: if G.messages: if all(value == G.pmlthickness['x0'] for value in G.pmlthickness.values()): pmlinfo = str(G.pmlthickness['x0']) + ' cells' else: pmlinfo = '' for key, value in G.pmlthickness.items(): pmlinfo += '{}: {} cells, '.format(key, value) pmlinfo = pmlinfo[:-2] print('PML boundaries: {}'.format(pmlinfo)) pbar = tqdm(total=sum(1 for value in G.pmlthickness.values() if value > 0), desc='Building PML boundaries', ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable) build_pmls(G, pbar) pbar.close() # Build the model, i.e. set the material properties (ID) for every edge of every Yee cell print() pbar = tqdm(total=2, desc='Building main grid', ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable) build_electric_components(G.solid, G.rigidE, G.ID, G) pbar.update() build_magnetic_components(G.solid, G.rigidH, G.ID, G) pbar.update() pbar.close() # Process any voltage sources (that have resistance) to create a new material at the source location for voltagesource in G.voltagesources: voltagesource.create_material(G) # Initialise arrays of update coefficients to pass to update functions G.initialise_std_update_coeff_arrays() # Initialise arrays of update coefficients and temporary values if there are any dispersive materials if Material.maxpoles != 0: G.initialise_dispersive_arrays() # Process complete list of materials - calculate update coefficients, store in arrays, and build text list of materials/properties materialsdata = process_materials(G) if G.messages: materialstable = AsciiTable(materialsdata) materialstable.outer_border = False materialstable.justify_columns[0] = 'right' print(materialstable.table) # Check to see if numerical dispersion might be a problem results = dispersion_analysis(G) if not results['waveform']: print( Fore.RED + "\nWARNING: Numerical dispersion analysis not carried out as either no waveform detected or waveform does not fit within specified time window and is therefore being truncated." + Style.RESET_ALL) elif results['N'] < G.mingridsampling: raise GeneralError( "Non-physical wave propagation: Material '{}' has wavelength sampled by {} cells, less than required minimum for physical wave propagation. Maximum significant frequency estimated as {:g}Hz" .format(results['material'].ID, results['N'], results['maxfreq'])) elif results['deltavp'] and np.abs( results['deltavp']) > G.maxnumericaldisp: print( Fore.RED + "\nWARNING: Potentially significant numerical dispersion. Estimated largest physical phase-velocity error is {:.2f}% in material '{}' whose wavelength sampled by {} cells. Maximum significant frequency estimated as {:g}Hz" .format(results['deltavp'], results['material'].ID, results['N'], results['maxfreq']) + Style.RESET_ALL) elif results['deltavp'] and G.messages: print( "\nNumerical dispersion analysis: estimated largest physical phase-velocity error is {:.2f}% in material '{}' whose wavelength sampled by {} cells. Maximum significant frequency estimated as {:g}Hz" .format(results['deltavp'], results['material'].ID, results['N'], results['maxfreq'])) # If geometry information to be reused between model runs else: inputfilestr = '\n--- Model {}/{}, input file (not re-processed, i.e. geometry fixed): {}'.format( currentmodelrun, numbermodelruns, inputfile.name) print(Fore.GREEN + '{} {}\n'.format( inputfilestr, '-' * (get_terminal_width() - 1 - len(inputfilestr))) + Style.RESET_ALL) # Clear arrays for field components G.initialise_field_arrays() # Clear arrays for fields in PML for pml in G.pmls: pml.initialise_field_arrays() # Adjust position of simple sources and receivers if required if G.srcsteps[0] != 0 or G.srcsteps[1] != 0 or G.srcsteps[2] != 0: for source in itertools.chain(G.hertziandipoles, G.magneticdipoles): if currentmodelrun == 1: if source.xcoord + G.srcsteps[0] * ( numbermodelruns - 1) < 0 or source.xcoord + G.srcsteps[0] * ( numbermodelruns - 1) > G.nx or source.ycoord + G.srcsteps[1] * ( numbermodelruns - 1) < 0 or source.ycoord + G.srcsteps[1] * ( numbermodelruns - 1 ) > G.ny or source.zcoord + G.srcsteps[2] * ( numbermodelruns - 1) < 0 or source.zcoord + G.srcsteps[2] * ( numbermodelruns - 1) > G.nz: raise GeneralError( 'Source(s) will be stepped to a position outside the domain.' ) source.xcoord = source.xcoordorigin + (currentmodelrun - 1) * G.srcsteps[0] source.ycoord = source.ycoordorigin + (currentmodelrun - 1) * G.srcsteps[1] source.zcoord = source.zcoordorigin + (currentmodelrun - 1) * G.srcsteps[2] if G.rxsteps[0] != 0 or G.rxsteps[1] != 0 or G.rxsteps[2] != 0: for receiver in G.rxs: if currentmodelrun == 1: if receiver.xcoord + G.rxsteps[0] * ( numbermodelruns - 1) < 0 or receiver.xcoord + G.rxsteps[0] * ( numbermodelruns - 1) > G.nx or receiver.ycoord + G.rxsteps[1] * ( numbermodelruns - 1) < 0 or receiver.ycoord + G.rxsteps[1] * ( numbermodelruns - 1 ) > G.ny or receiver.zcoord + G.rxsteps[2] * ( numbermodelruns - 1 ) < 0 or receiver.zcoord + G.rxsteps[2] * ( numbermodelruns - 1) > G.nz: raise GeneralError( 'Receiver(s) will be stepped to a position outside the domain.' ) receiver.xcoord = receiver.xcoordorigin + (currentmodelrun - 1) * G.rxsteps[0] receiver.ycoord = receiver.ycoordorigin + (currentmodelrun - 1) * G.rxsteps[1] receiver.zcoord = receiver.zcoordorigin + (currentmodelrun - 1) * G.rxsteps[2] # Write files for any geometry views and geometry object outputs if not (G.geometryviews or G.geometryobjectswrite) and args.geometry_only: print( Fore.RED + '\nWARNING: No geometry views or geometry objects to output found.' + Style.RESET_ALL) if G.geometryviews: print() for i, geometryview in enumerate(G.geometryviews): geometryview.set_filename(currentmodelrun, numbermodelruns, G) pbar = tqdm(total=geometryview.datawritesize, unit='byte', unit_scale=True, desc='Writing geometry view file {}/{}, {}'.format( i + 1, len(G.geometryviews), os.path.split(geometryview.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable) geometryview.write_vtk(currentmodelrun, numbermodelruns, G, pbar) pbar.close() if G.geometryobjectswrite: for i, geometryobject in enumerate(G.geometryobjectswrite): pbar = tqdm(total=geometryobject.datawritesize, unit='byte', unit_scale=True, desc='Writing geometry object file {}/{}, {}'.format( i + 1, len(G.geometryobjectswrite), os.path.split(geometryobject.filename)[1]), ncols=get_terminal_width() - 1, file=sys.stdout, disable=G.tqdmdisable) geometryobject.write_hdf5(G, pbar) pbar.close() # Run simulation (if not only looking ar geometry information) if not args.geometry_only: # Prepare any snapshot files for snapshot in G.snapshots: snapshot.prepare_vtk_imagedata(currentmodelrun, numbermodelruns, G) # Output filename inputfileparts = os.path.splitext( os.path.join(G.inputdirectory, G.inputfilename)) if numbermodelruns == 1: outputfile = inputfileparts[0] + '.out' else: outputfile = inputfileparts[0] + str(currentmodelrun) + '.out' print('\nOutput file: {}\n'.format(outputfile)) # Main FDTD solving functions for either CPU or GPU tsolve = solve_cpu(currentmodelrun, numbermodelruns, G) # Write an output file in HDF5 format write_hdf5_outputfile(outputfile, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G) if G.messages: print('Memory (RAM) used: ~{}'.format( human_size(p.memory_info().rss))) print('Solving time [HH:MM:SS]: {}'.format( datetime.timedelta(seconds=tsolve))) return tsolve # If geometry information to be reused between model runs then FDTDGrid class instance must be global so that it persists if not args.geometry_fixed: del G
def process_singlecmds(singlecmds, multicmds, G): """Checks the validity of command parameters and creates instances of classes of parameters. Args: singlecmds (dict): Commands that can only occur once in the model. multicmds (dict): Commands that can have multiple instances in the model (required to pass to process_materials_file function). G (class): Grid class instance - holds essential parameters describing the model. """ # Check validity of command parameters in order needed # messages cmd = "#messages" if singlecmds[cmd] != "None": tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + " requires exactly one parameter") if singlecmds[cmd].lower() == "y": G.messages = True elif singlecmds[cmd].lower() == "n": G.messages = False else: raise CmdInputError(cmd + " requires input values of either y or n") # Title cmd = "#title" if singlecmds[cmd] != "None": G.title = singlecmds[cmd] if G.messages: print("Model title: {}".format(G.title)) # Number of processors to run on (OpenMP) cmd = "#num_threads" ompthreads = os.environ.get("OMP_NUM_THREADS") if singlecmds[cmd] != "None": tmp = tuple(int(x) for x in singlecmds[cmd].split()) if len(tmp) != 1: raise CmdInputError(cmd + " requires exactly one parameter to specify the number of threads to use") if tmp[0] < 1: raise CmdInputError(cmd + " requires the value to be an integer not less than one") G.nthreads = tmp[0] elif ompthreads: G.nthreads = int(ompthreads) else: # Set number of threads to number of physical CPU cores, i.e. avoid hyperthreading with OpenMP G.nthreads = psutil.cpu_count(logical=False) if G.messages: print("Number of threads: {}".format(G.nthreads)) # Spatial discretisation cmd = "#dx_dy_dz" tmp = [float(x) for x in singlecmds[cmd].split()] if len(tmp) != 3: raise CmdInputError(cmd + " requires exactly three parameters") if tmp[0] <= 0: raise CmdInputError(cmd + " requires the x-direction spatial step to be greater than zero") if tmp[1] <= 0: raise CmdInputError(cmd + " requires the y-direction spatial step to be greater than zero") if tmp[2] <= 0: raise CmdInputError(cmd + " requires the z-direction spatial step to be greater than zero") G.dx = tmp[0] G.dy = tmp[1] G.dz = tmp[2] if G.messages: print("Spatial discretisation: {:g} x {:g} x {:g}m".format(G.dx, G.dy, G.dz)) # Domain cmd = "#domain" tmp = [float(x) for x in singlecmds[cmd].split()] if len(tmp) != 3: raise CmdInputError(cmd + " requires exactly three parameters") G.nx = round_value(tmp[0] / G.dx) G.ny = round_value(tmp[1] / G.dy) G.nz = round_value(tmp[2] / G.dz) if G.messages: print( "Model domain: {:g} x {:g} x {:g}m ({:d} x {:d} x {:d} = {:g} cells)".format( tmp[0], tmp[1], tmp[2], G.nx, G.ny, G.nz, (G.nx * G.ny * G.nz) ) ) mem = ( ( (G.nx + 1) * (G.ny + 1) * (G.nz + 1) * 13 * np.dtype(floattype).itemsize + (G.nx + 1) * (G.ny + 1) * (G.nz + 1) * 18 ) * 1.1 ) + 30e6 print( "Memory (RAM) usage: ~{} required, {} available".format( human_size(mem), human_size(psutil.virtual_memory().total) ) ) # Time step CFL limit - use either 2D or 3D (default) cmd = "#time_step_limit_type" if singlecmds[cmd] != "None": tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + " requires exactly one parameter") if singlecmds[cmd].lower() == "2d": if G.nx == 1: G.dt = 1 / (c * np.sqrt((1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) elif G.ny == 1: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dz) * (1 / G.dz))) elif G.nz == 1: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy))) else: raise CmdInputError(cmd + " 2D CFL limit can only be used when one dimension of the domain is one cell") elif singlecmds[cmd].lower() == "3d": G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) else: raise CmdInputError(cmd + " requires input values of either 2D or 3D") else: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) # Round down time step to nearest float with precision one less than hardware maximum. Avoids inadvertently exceeding the CFL due to binary representation of floating point number. G.dt = round_value(G.dt, decimalplaces=d.getcontext().prec - 1) if G.messages: print("Time step: {:g} secs".format(G.dt)) # Time step stability factor cmd = "#time_step_stability_factor" if singlecmds[cmd] != "None": tmp = tuple(float(x) for x in singlecmds[cmd].split()) if len(tmp) != 1: raise CmdInputError(cmd + " requires exactly one parameter") if tmp[0] <= 0 or tmp[0] > 1: raise CmdInputError( cmd + " requires the value of the time step stability factor to be between zero and one" ) G.dt = G.dt * tmp[0] if G.messages: print("Time step (modified): {:g} secs".format(G.dt)) # Time window cmd = "#time_window" tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError( cmd + " requires exactly one parameter to specify the time window. Either in seconds or number of iterations." ) tmp = tmp[0].lower() # If real floating point value given if "." in tmp or "e" in tmp: if float(tmp) > 0: G.timewindow = float(tmp) G.iterations = round_value((float(tmp) / G.dt)) + 1 else: raise CmdInputError(cmd + " must have a value greater than zero") # If number of iterations given else: G.timewindow = (int(tmp) - 1) * G.dt G.iterations = int(tmp) if G.messages: print("Time window: {:g} secs ({} iterations)".format(G.timewindow, G.iterations)) # PML cmd = "#pml_cells" if singlecmds[cmd] != "None": tmp = singlecmds[cmd].split() if len(tmp) != 1 and len(tmp) != 6: raise CmdInputError(cmd + " requires either one or six parameters") if len(tmp) == 1: G.pmlthickness = (int(tmp[0]), int(tmp[0]), int(tmp[0]), int(tmp[0]), int(tmp[0]), int(tmp[0])) else: G.pmlthickness = (int(tmp[0]), int(tmp[1]), int(tmp[2]), int(tmp[3]), int(tmp[4]), int(tmp[5])) if ( 2 * G.pmlthickness[0] >= G.nx or 2 * G.pmlthickness[1] >= G.ny or 2 * G.pmlthickness[2] >= G.nz or 2 * G.pmlthickness[3] >= G.nx or 2 * G.pmlthickness[4] >= G.ny or 2 * G.pmlthickness[5] >= G.nz ): raise CmdInputError(cmd + " has too many cells for the domain size") # src_steps cmd = "#src_steps" if singlecmds[cmd] != "None": tmp = singlecmds[cmd].split() if len(tmp) != 3: raise CmdInputError(cmd + " requires exactly three parameters") G.srcstepx = round_value(float(tmp[0]) / G.dx) G.srcstepy = round_value(float(tmp[1]) / G.dy) G.srcstepz = round_value(float(tmp[2]) / G.dz) if G.messages: print( "All sources will step {:g}m, {:g}m, {:g}m for each model run.".format( G.srcstepx * G.dx, G.srcstepy * G.dy, G.srcstepz * G.dz ) ) # rx_steps cmd = "#rx_steps" if singlecmds[cmd] != "None": tmp = singlecmds[cmd].split() if len(tmp) != 3: raise CmdInputError(cmd + " requires exactly three parameters") G.rxstepx = round_value(float(tmp[0]) / G.dx) G.rxstepy = round_value(float(tmp[1]) / G.dy) G.rxstepz = round_value(float(tmp[2]) / G.dz) if G.messages: print( "All receivers will step {:g}m, {:g}m, {:g}m for each model run.".format( G.rxstepx * G.dx, G.rxstepy * G.dy, G.rxstepz * G.dz ) ) # Excitation file for user-defined source waveforms cmd = "#excitation_file" if singlecmds[cmd] != "None": tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + " requires exactly one parameter") excitationfile = tmp[0] # Open file and get waveform names with open(excitationfile, "r") as f: waveformIDs = f.readline().split() # Read all waveform values into an array waveformvalues = np.loadtxt(excitationfile, skiprows=1, dtype=floattype) for waveform in range(len(waveformIDs)): if any(x.ID == waveformIDs[waveform] for x in G.waveforms): raise CmdInputError("Waveform with ID {} already exists".format(waveformIDs[waveform])) w = Waveform() w.ID = waveformIDs[waveform] w.type = "user" if len(waveformvalues.shape) == 1: w.uservalues = waveformvalues[:] else: w.uservalues = waveformvalues[:, waveform] if G.messages: print("User waveform {} created.".format(w.ID)) G.waveforms.append(w)
def process_singlecmds(singlecmds, G): """Checks the validity of command parameters and creates instances of classes of parameters. Args: singlecmds (dict): Commands that can only occur once in the model. G (class): Grid class instance - holds essential parameters describing the model. """ # Check validity of command parameters in order needed # messages cmd = '#messages' if singlecmds[cmd] is not None: tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') if singlecmds[cmd].lower() == 'y': G.messages = True elif singlecmds[cmd].lower() == 'n': G.messages = False else: raise CmdInputError(cmd + ' requires input values of either y or n') # Title cmd = '#title' if singlecmds[cmd] is not None: G.title = singlecmds[cmd] if G.messages: print('Model title: {}'.format(G.title)) # Get information about host machine hostinfo = get_host_info() # Number of threads (OpenMP) to use cmd = '#num_threads' if sys.platform == 'darwin': os.environ[ 'OMP_WAIT_POLICY'] = 'ACTIVE' # Should waiting threads consume CPU power (can drastically effect performance) os.environ[ 'OMP_DYNAMIC'] = 'FALSE' # Number of threads may be adjusted by the run time environment to best utilize system resources os.environ[ 'OMP_PLACES'] = 'cores' # Each place corresponds to a single core (having one or more hardware threads) os.environ['OMP_PROC_BIND'] = 'TRUE' # Bind threads to physical cores # os.environ['OMP_DISPLAY_ENV'] = 'TRUE' # Prints OMP version and environment variables (useful for debug) # Catch bug with Windows Subsystem for Linux (https://github.com/Microsoft/BashOnWindows/issues/785) if 'Microsoft' in hostinfo['osversion']: os.environ['KMP_AFFINITY'] = 'disabled' del os.environ['OMP_PLACES'] del os.environ['OMP_PROC_BIND'] if singlecmds[cmd] is not None: tmp = tuple(int(x) for x in singlecmds[cmd].split()) if len(tmp) != 1: raise CmdInputError( cmd + ' requires exactly one parameter to specify the number of threads to use' ) if tmp[0] < 1: raise CmdInputError( cmd + ' requires the value to be an integer not less than one') G.nthreads = tmp[0] os.environ['OMP_NUM_THREADS'] = str(G.nthreads) elif os.environ.get('OMP_NUM_THREADS'): G.nthreads = int(os.environ.get('OMP_NUM_THREADS')) else: # Set number of threads to number of physical CPU cores G.nthreads = hostinfo['physicalcores'] os.environ['OMP_NUM_THREADS'] = str(G.nthreads) if G.messages: print('Number of CPU (OpenMP) threads: {}'.format(G.nthreads)) if G.nthreads > hostinfo['physicalcores']: print( Fore.RED + 'WARNING: You have specified more threads ({}) than available physical CPU cores ({}). This may lead to degraded performance.' .format(G.nthreads, hostinfo['physicalcores']) + Style.RESET_ALL) # Spatial discretisation cmd = '#dx_dy_dz' tmp = [float(x) for x in singlecmds[cmd].split()] if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') if tmp[0] <= 0: raise CmdInputError( cmd + ' requires the x-direction spatial step to be greater than zero') if tmp[1] <= 0: raise CmdInputError( cmd + ' requires the y-direction spatial step to be greater than zero') if tmp[2] <= 0: raise CmdInputError( cmd + ' requires the z-direction spatial step to be greater than zero') G.dx = tmp[0] G.dy = tmp[1] G.dz = tmp[2] if G.messages: print('Spatial discretisation: {:g} x {:g} x {:g}m'.format( G.dx, G.dy, G.dz)) # Domain cmd = '#domain' tmp = [float(x) for x in singlecmds[cmd].split()] if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.nx = round_value(tmp[0] / G.dx) G.ny = round_value(tmp[1] / G.dy) G.nz = round_value(tmp[2] / G.dz) if G.nx == 0 or G.ny == 0 or G.nz == 0: raise CmdInputError(cmd + ' requires at least one cell in every dimension') if G.messages: print( 'Domain size: {:g} x {:g} x {:g}m ({:d} x {:d} x {:d} = {:g} cells)' .format(tmp[0], tmp[1], tmp[2], G.nx, G.ny, G.nz, (G.nx * G.ny * G.nz))) # Estimate memory (RAM) usage memestimate = memory_usage(G) # Check if model can be built and/or run on host if memestimate > hostinfo['ram']: raise GeneralError( 'Estimated memory (RAM) required ~{} exceeds {} detected!\n'. format(human_size(memestimate), human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True))) if G.messages: print('Estimated memory (RAM) required: ~{}'.format( human_size(memestimate))) # Time step CFL limit (use either 2D or 3D) and default PML thickness if G.nx == 1: G.dt = 1 / (c * np.sqrt((1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) G.dimension = '2D' G.pmlthickness['x0'] = 0 G.pmlthickness['xmax'] = 0 elif G.ny == 1: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dz) * (1 / G.dz))) G.dimension = '2D' G.pmlthickness['y0'] = 0 G.pmlthickness['ymax'] = 0 elif G.nz == 1: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy))) G.dimension = '2D' G.pmlthickness['z0'] = 0 G.pmlthickness['zmax'] = 0 else: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) G.dimension = '3D' # Round down time step to nearest float with precision one less than hardware maximum. Avoids inadvertently exceeding the CFL due to binary representation of floating point number. G.dt = round_value(G.dt, decimalplaces=d.getcontext().prec - 1) if G.messages: print('Time step (at {} CFL limit): {:g} secs'.format( G.dimension, G.dt)) # Time step stability factor cmd = '#time_step_stability_factor' if singlecmds[cmd] is not None: tmp = tuple(float(x) for x in singlecmds[cmd].split()) if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') if tmp[0] <= 0 or tmp[0] > 1: raise CmdInputError( cmd + ' requires the value of the time step stability factor to be between zero and one' ) G.dt = G.dt * tmp[0] if G.messages: print('Time step (modified): {:g} secs'.format(G.dt)) # Time window cmd = '#time_window' tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError( cmd + ' requires exactly one parameter to specify the time window. Either in seconds or number of iterations.' ) tmp = tmp[0].lower() # If number of iterations given try: tmp = int(tmp) G.timewindow = (tmp - 1) * G.dt G.iterations = tmp # If real floating point value given except: tmp = float(tmp) if tmp > 0: G.timewindow = tmp G.iterations = round_value((tmp / G.dt)) + 1 else: raise CmdInputError(cmd + ' must have a value greater than zero') if G.messages: print('Time window: {:g} secs ({} iterations)'.format( G.timewindow, G.iterations)) # PML cmd = '#pml_cells' if singlecmds[cmd] is not None: tmp = singlecmds[cmd].split() if len(tmp) != 1 and len(tmp) != 6: raise CmdInputError(cmd + ' requires either one or six parameters') if len(tmp) == 1: for key in G.pmlthickness.keys(): G.pmlthickness[key] = int(tmp[0]) else: G.pmlthickness['x0'] = int(tmp[0]) G.pmlthickness['y0'] = int(tmp[1]) G.pmlthickness['z0'] = int(tmp[2]) G.pmlthickness['xmax'] = int(tmp[3]) G.pmlthickness['ymax'] = int(tmp[4]) G.pmlthickness['zmax'] = int(tmp[5]) if 2 * G.pmlthickness['x0'] >= G.nx or 2 * G.pmlthickness[ 'y0'] >= G.ny or 2 * G.pmlthickness[ 'z0'] >= G.nz or 2 * G.pmlthickness[ 'xmax'] >= G.nx or 2 * G.pmlthickness[ 'ymax'] >= G.ny or 2 * G.pmlthickness['zmax'] >= G.nz: raise CmdInputError(cmd + ' has too many cells for the domain size') # src_steps cmd = '#src_steps' if singlecmds[cmd] is not None: tmp = singlecmds[cmd].split() if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.srcsteps[0] = round_value(float(tmp[0]) / G.dx) G.srcsteps[1] = round_value(float(tmp[1]) / G.dy) G.srcsteps[2] = round_value(float(tmp[2]) / G.dz) if G.messages: print( 'Simple sources will step {:g}m, {:g}m, {:g}m for each model run.' .format(G.srcsteps[0] * G.dx, G.srcsteps[1] * G.dy, G.srcsteps[2] * G.dz)) # rx_steps cmd = '#rx_steps' if singlecmds[cmd] is not None: tmp = singlecmds[cmd].split() if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.rxsteps[0] = round_value(float(tmp[0]) / G.dx) G.rxsteps[1] = round_value(float(tmp[1]) / G.dy) G.rxsteps[2] = round_value(float(tmp[2]) / G.dz) if G.messages: print( 'All receivers will step {:g}m, {:g}m, {:g}m for each model run.' .format(G.rxsteps[0] * G.dx, G.rxsteps[1] * G.dy, G.rxsteps[2] * G.dz)) # Excitation file for user-defined source waveforms cmd = '#excitation_file' if singlecmds[cmd] is not None: tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') excitationfile = tmp[0] # See if file exists at specified path and if not try input file directory if not os.path.isfile(excitationfile): excitationfile = os.path.abspath( os.path.join(G.inputdirectory, excitationfile)) # Get waveform names with open(excitationfile, 'r') as f: waveformIDs = f.readline().split() # Read all waveform values into an array waveformvalues = np.loadtxt(excitationfile, skiprows=1, dtype=floattype) for waveform in range(len(waveformIDs)): if any(x.ID == waveformIDs[waveform] for x in G.waveforms): raise CmdInputError( 'Waveform with ID {} already exists'.format( waveformIDs[waveform])) w = Waveform() w.ID = waveformIDs[waveform] w.type = 'user' if len(waveformvalues.shape) == 1: w.uservalues = waveformvalues[:] else: w.uservalues = waveformvalues[:, waveform] if G.messages: print('User waveform {} created.'.format(w.ID)) G.waveforms.append(w)
def run_model(args, modelrun, numbermodelruns, inputfile, usernamespace): """Runs a model - processes the input file; builds the Yee cells; calculates update coefficients; runs main FDTD loop. Args: args (dict): Namespace with command line arguments modelrun (int): Current model run number. numbermodelruns (int): Total number of model runs. inputfile (str): Name of the input file to open. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. Returns: tsolve (int): Length of time (seconds) of main FDTD calculations """ # Monitor memory usage p = psutil.Process() print('\n{}\n\nModel input file: {}\n'.format(68*'*', inputfile)) # Add the current model run to namespace that can be accessed by user in any Python code blocks in input file usernamespace['current_model_run'] = modelrun print('Constants/variables available for Python scripting: {}\n'.format(usernamespace)) # Process any user input Python commands processedlines = python_code_blocks(inputfile, usernamespace) # Write a file containing the input commands after Python blocks have been processed if args.write_python: write_python_processed(inputfile, modelrun, numbermodelruns, processedlines) # Check validity of command names & that essential commands are present singlecmds, multicmds, geometry = check_cmd_names(processedlines) # Initialise an instance of the FDTDGrid class G = FDTDGrid() G.inputdirectory = usernamespace['inputdirectory'] # Process parameters for commands that can only occur once in the model process_singlecmds(singlecmds, multicmds, G) # Process parameters for commands that can occur multiple times in the model process_multicmds(multicmds, G) # Initialise an array for volumetric material IDs (solid), boolean arrays for specifying materials not to be averaged (rigid), # an array for cell edge IDs (ID), and arrays for the field components. G.initialise_std_arrays() # Process the geometry commands in the order they were given tinputprocstart = perf_counter() process_geometrycmds(geometry, G) tinputprocend = perf_counter() print('\nInput file processed in [HH:MM:SS]: {}'.format(datetime.timedelta(seconds=int(tinputprocend - tinputprocstart)))) # Build the PML and calculate initial coefficients build_pmls(G) # Build the model, i.e. set the material properties (ID) for every edge of every Yee cell tbuildstart = perf_counter() build_electric_components(G.solid, G.rigidE, G.ID, G) build_magnetic_components(G.solid, G.rigidH, G.ID, G) tbuildend = perf_counter() print('\nModel built in [HH:MM:SS]: {}'.format(datetime.timedelta(seconds=int(tbuildend - tbuildstart)))) # Process any voltage sources (that have resistance) to create a new material at the source location for voltagesource in G.voltagesources: voltagesource.create_material(G) # Initialise arrays of update coefficients to pass to update functions G.initialise_std_updatecoeff_arrays() # Initialise arrays of update coefficients and temporary values if there are any dispersive materials if Material.maxpoles != 0: G.initialise_dispersive_arrays() # Calculate update coefficients, store in arrays, and list materials in model if G.messages: print('\nMaterials:\n') print('ID\tName\t\tProperties') print('{}'.format('-'*50)) for material in G.materials: # Calculate update coefficients for material material.calculate_update_coeffsE(G) material.calculate_update_coeffsH(G) # Store all update coefficients together G.updatecoeffsE[material.numID, :] = material.CA, material.CBx, material.CBy, material.CBz, material.srce G.updatecoeffsH[material.numID, :] = material.DA, material.DBx, material.DBy, material.DBz, material.srcm # Store coefficients for any dispersive materials if Material.maxpoles != 0: z = 0 for pole in range(Material.maxpoles): G.updatecoeffsdispersive[material.numID, z:z+3] = e0 * material.eqt2[pole], material.eqt[pole], material.zt[pole] z += 3 if G.messages: if material.deltaer and material.tau: tmp = 'delta_epsr={}, tau={} secs; '.format(', '.join('{:g}'.format(deltaer) for deltaer in material.deltaer), ', '.join('{:g}'.format(tau) for tau in material.tau)) else: tmp = '' if material.average: dielectricsmoothing = 'dielectric smoothing permitted.' else: dielectricsmoothing = 'dielectric smoothing not permitted.' print('{:3}\t{:12}\tepsr={:g}, sig={:g} S/m; mur={:g}, sig*={:g} S/m; '.format(material.numID, material.ID, material.er, material.se, material.mr, material.sm) + tmp + dielectricsmoothing) # Check to see if numerical dispersion might be a problem if dispersion_check(G.waveforms, G.materials, G.dx, G.dy, G.dz): print('\nWARNING: Potential numerical dispersion in the simulation. Check the spatial discretisation against the smallest wavelength present.') # Write files for any geometry views if not G.geometryviews and args.geometry_only: raise GeneralError('No geometry views found.') elif G.geometryviews: tgeostart = perf_counter() for geometryview in G.geometryviews: geometryview.write_file(modelrun, numbermodelruns, G) tgeoend = perf_counter() print('\nGeometry file(s) written in [HH:MM:SS]: {}'.format(datetime.timedelta(seconds=int(tgeoend - tgeostart)))) # Run simulation if not doing only geometry if not args.geometry_only: # Prepare any snapshot files for snapshot in G.snapshots: snapshot.prepare_file(modelrun, numbermodelruns, G) # Prepare output file inputfileparts = os.path.splitext(inputfile) if numbermodelruns == 1: outputfile = inputfileparts[0] + '.out' else: outputfile = inputfileparts[0] + str(modelrun) + '.out' sys.stdout.write('\nOutput to file: {}\n'.format(outputfile)) sys.stdout.flush() f = prepare_output_file(outputfile, G) # Adjust position of sources and receivers if required if G.srcstepx > 0 or G.srcstepy > 0 or G.srcstepz > 0: for source in itertools.chain(G.hertziandipoles, G.magneticdipoles, G.voltagesources, G.transmissionlines): source.xcoord += (modelrun - 1) * G.srcstepx source.ycoord += (modelrun - 1) * G.srcstepy source.zcoord += (modelrun - 1) * G.srcstepz if G.rxstepx > 0 or G.rxstepy > 0 or G.rxstepz > 0: for receiver in G.rxs: receiver.xcoord += (modelrun - 1) * G.rxstepx receiver.ycoord += (modelrun - 1) * G.rxstepy receiver.zcoord += (modelrun - 1) * G.rxstepz ################################## # Main FDTD calculation loop # ################################## tsolvestart = perf_counter() # Absolute time abstime = 0 for timestep in range(G.iterations): if timestep == 0: tstepstart = perf_counter() # Write field outputs to file write_output(f, timestep, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G) # Write any snapshots to file for snapshot in G.snapshots: if snapshot.time == timestep + 1: snapshot.write_snapshot(G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz, G) # Update electric field components if Material.maxpoles == 0: # All materials are non-dispersive so do standard update update_electric(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsE, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) elif Material.maxpoles == 1: # If there are any dispersive materials do 1st part of dispersive update (it is split into two parts as it requires present and updated electric field values). update_electric_dispersive_1pole_A(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsE, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) elif Material.maxpoles > 1: update_electric_dispersive_multipole_A(G.nx, G.ny, G.nz, G.nthreads, Material.maxpoles, G.updatecoeffsE, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) # Update electric field components with the PML correction update_electric_pml(G) # Update electric field components from sources for voltagesource in G.voltagesources: voltagesource.update_electric(abstime, G.updatecoeffsE, G.ID, G.Ex, G.Ey, G.Ez, G) for transmissionline in G.transmissionlines: transmissionline.update_electric(abstime, G.Ex, G.Ey, G.Ez, G) for hertziandipole in G.hertziandipoles: # Update any Hertzian dipole sources last hertziandipole.update_electric(abstime, G.updatecoeffsE, G.ID, G.Ex, G.Ey, G.Ez, G) # If there are any dispersive materials do 2nd part of dispersive update (it is split into two parts as it requires present and updated electric field values). Therefore it can only be completely updated after the electric field has been updated by the PML and source updates. if Material.maxpoles == 1: update_electric_dispersive_1pole_B(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez) elif Material.maxpoles > 1: update_electric_dispersive_multipole_B(G.nx, G.ny, G.nz, G.nthreads, Material.maxpoles, G.updatecoeffsdispersive, G.ID, G.Tx, G.Ty, G.Tz, G.Ex, G.Ey, G.Ez) # Increment absolute time value abstime += 0.5 * G.dt # Update magnetic field components update_magnetic(G.nx, G.ny, G.nz, G.nthreads, G.updatecoeffsH, G.ID, G.Ex, G.Ey, G.Ez, G.Hx, G.Hy, G.Hz) # Update magnetic field components with the PML correction update_magnetic_pml(G) # Update magnetic field components from sources for transmissionline in G.transmissionlines: transmissionline.update_magnetic(abstime, G.Hx, G.Hy, G.Hz, G) for magneticdipole in G.magneticdipoles: magneticdipole.update_magnetic(abstime, G.updatecoeffsH, G.ID, G.Hx, G.Hy, G.Hz, G) # Increment absolute time value abstime += 0.5 * G.dt # Calculate time for two iterations, used to estimate overall runtime if timestep == 1: tstepend = perf_counter() runtime = datetime.timedelta(seconds=int((tstepend - tstepstart) / 2 * G.iterations)) sys.stdout.write('Estimated runtime [HH:MM:SS]: {}\n'.format(runtime)) sys.stdout.write('Solving for model run {} of {}...\n'.format(modelrun, numbermodelruns)) sys.stdout.flush() elif timestep > 1: update_progress((timestep + 1) / G.iterations) # Close output file f.close() tsolveend = perf_counter() print('\n\nSolving took [HH:MM:SS]: {}'.format(datetime.timedelta(seconds=int(tsolveend - tsolvestart)))) print('Peak memory (approx) used: {}'.format(human_size(p.memory_info().rss))) return int(tsolveend - tsolvestart)
def run_main(args): """Top-level function that controls what mode of simulation (standard/optimsation/benchmark etc...) is run. Args: args (dict): Namespace with input arguments from command line or api. """ numbermodelruns = args.n with open_path_file(args.inputfile) as inputfile: # Get information about host machine hostinfo = get_host_info() print('\nHost: {}; {} ({} cores); {} RAM; {}'.format( hostinfo['machineID'], hostinfo['cpuID'], hostinfo['cpucores'], human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion'])) # Create a separate namespace that users can access in any Python code blocks in the input file usernamespace = { 'c': c, 'e0': e0, 'm0': m0, 'z0': z0, 'number_model_runs': numbermodelruns, 'input_directory': os.path.dirname(os.path.abspath(inputfile.name)) } ####################################### # Process for benchmarking simulation # ####################################### if args.benchmark: run_benchmark_sim(args, inputfile, usernamespace) #################################################### # Process for simulation with Taguchi optimisation # #################################################### elif args.opt_taguchi: if args.benchmark: raise GeneralError( 'Taguchi optimisation should not be used with benchmarking mode' ) from gprMax.optimisation_taguchi import run_opt_sim run_opt_sim(args, numbermodelruns, inputfile, usernamespace) ################################################ # Process for standard simulation (CPU or GPU) # ################################################ else: # Mixed mode MPI with OpenMP or CUDA - MPI task farm for models with each model parallelised with OpenMP (CPU) or CUDA (GPU) if args.mpi: if args.benchmark: raise GeneralError( 'MPI should not be used with benchmarking mode') if numbermodelruns == 1: raise GeneralError( 'MPI is not beneficial when there is only one model to run' ) run_mpi_sim(args, numbermodelruns, inputfile, usernamespace) # Standard behaviour - part of a job array on Open Grid Scheduler/Grid Engine with each model parallelised with OpenMP (CPU) or CUDA (GPU) elif args.task: if args.benchmark: raise GeneralError( 'A job array should not be used with benchmarking mode' ) run_job_array_sim(args, numbermodelruns, inputfile, usernamespace) # Standard behaviour - models run serially with each model parallelised with OpenMP (CPU) or CUDA (GPU) else: run_std_sim(args, numbermodelruns, inputfile, usernamespace)
def run_benchmark_sim(args, inputfile, usernamespace): """ Run standard simulation in benchmarking mode - models are run one after another and each model is parallelised using either OpenMP (CPU) or CUDA (GPU) Args: args (dict): Namespace with command line arguments inputfile (object): File object for the input file. usernamespace (dict): Namespace that can be accessed by user in any Python code blocks in input file. """ # Get information about host machine hostinfo = get_host_info() hyperthreading = ', {} cores with Hyper-Threading'.format(hostinfo['logicalcores']) if hostinfo['hyperthreading'] else '' machineIDlong = '{}; {} x {} ({} cores{}); {} RAM; {}'.format(hostinfo['machineID'], hostinfo['sockets'], hostinfo['cpuID'], hostinfo['physicalcores'], hyperthreading, human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion']) # Initialise arrays to hold CPU thread info and times, and GPU info and times cputhreads = np.array([], dtype=np.int32) cputimes = np.array([]) gpuIDs = [] gputimes = np.array([]) # CPU only benchmarking if args.gpu is None: # Number of CPU threads to benchmark - start from single thread and double threads until maximum number of physical cores threads = 1 maxthreads = hostinfo['physicalcores'] maxthreadspersocket = hostinfo['physicalcores'] / hostinfo['sockets'] while threads < maxthreadspersocket: cputhreads = np.append(cputhreads, int(threads)) threads *= 2 # Check for system with only single thread if cputhreads.size == 0: cputhreads = np.append(cputhreads, threads) # Add maxthreadspersocket and maxthreads if necessary if cputhreads[-1] != maxthreadspersocket: cputhreads = np.append(cputhreads, int(maxthreadspersocket)) if cputhreads[-1] != maxthreads: cputhreads = np.append(cputhreads, int(maxthreads)) cputhreads = cputhreads[::-1] cputimes = np.zeros(len(cputhreads)) numbermodelruns = len(cputhreads) # GPU only benchmarking else: # Set size of array to store GPU runtimes and number of runs of model required if isinstance(args.gpu, list): for gpu in args.gpu: gpuIDs.append(gpu.name) gputimes = np.zeros(len(args.gpu)) numbermodelruns = len(args.gpu) else: gpuIDs.append(args.gpu.name) gputimes = np.zeros(1) numbermodelruns = 1 # Store GPU information in a temp variable gpus = args.gpu usernamespace['number_model_runs'] = numbermodelruns modelend = numbermodelruns + 1 for currentmodelrun in range(1, modelend): # Run CPU benchmark if args.gpu is None: os.environ['OMP_NUM_THREADS'] = str(cputhreads[currentmodelrun - 1]) cputimes[currentmodelrun - 1] = run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, usernamespace) # Run GPU benchmark else: if isinstance(gpus, list): args.gpu = gpus[(currentmodelrun - 1)] else: args.gpu = gpus os.environ['OMP_NUM_THREADS'] = str(hostinfo['physicalcores']) gputimes[(currentmodelrun - 1)] = run_model(args, currentmodelrun, modelend - 1, numbermodelruns, inputfile, usernamespace) # Get model size (in cells) and number of iterations if currentmodelrun == 1: if numbermodelruns == 1: outputfile = os.path.splitext(args.inputfile)[0] + '.out' else: outputfile = os.path.splitext(args.inputfile)[0] + str(currentmodelrun) + '.out' f = h5py.File(outputfile, 'r') iterations = f.attrs['Iterations'] numcells = f.attrs['nx, ny, nz'] # Save number of threads and benchmarking times to NumPy archive np.savez(os.path.splitext(inputfile.name)[0], machineID=machineIDlong, gpuIDs=gpuIDs, cputhreads=cputhreads, cputimes=cputimes, gputimes=gputimes, iterations=iterations, numcells=numcells, version=__version__) simcompletestr = '\n=== Simulation completed' print('{} {}\n'.format(simcompletestr, '=' * (get_terminal_width() - 1 - len(simcompletestr))))
def solve_gpu(currentmodelrun, modelend, G): """Solving using FDTD method on GPU. Implemented using Nvidia CUDA. Args: currentmodelrun (int): Current model run number. modelend (int): Number of last model to run. G (class): Grid class instance - holds essential parameters describing the model. Returns: tsolve (float): Time taken to execute solving memsolve (int): memory usage on final iteration in bytes """ import pycuda.driver as drv from pycuda.compiler import SourceModule drv.init() # Suppress nvcc warnings on Windows if sys.platform == 'win32': compiler_opts = ['-w'] else: compiler_opts = None # Create device handle and context on specifc GPU device (and make it current context) dev = drv.Device(G.gpu.deviceID) ctx = dev.make_context() # Electric and magnetic field updates - prepare kernels, and get kernel functions if Material.maxpoles > 0: kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=G.updatecoeffsdispersive.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=G.Tx.shape[1], NY_T=G.Tx.shape[2], NZ_T=G.Tx.shape[3]), options=compiler_opts) else: # Set to one any substitutions for dispersive materials kernels_fields = SourceModule(kernels_template_fields.substitute(REAL=cudafloattype, COMPLEX=cudacomplextype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_MATDISPCOEFFS=1, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3], NX_T=1, NY_T=1, NZ_T=1), options=compiler_opts) update_e_gpu = kernels_fields.get_function("update_e") update_h_gpu = kernels_fields.get_function("update_h") # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for fields kernels updatecoeffsE = kernels_fields.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_fields.get_global('updatecoeffsH')[0] if G.updatecoeffsE.nbytes + G.updatecoeffsH.nbytes > G.gpu.constmem: raise GeneralError('Too many materials in the model to fit onto constant memory of size {} on {} - {} GPU'.format(human_size(G.gpu.constmem), G.gpu.deviceID, G.gpu.name)) else: drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE) drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH) # Electric and magnetic field updates - dispersive materials - get kernel functions and initialise array on GPU if Material.maxpoles > 0: # If there are any dispersive materials (updates are split into two parts as they require present and updated electric field values). update_e_dispersive_A_gpu = kernels_fields.get_function("update_e_dispersive_A") update_e_dispersive_B_gpu = kernels_fields.get_function("update_e_dispersive_B") G.gpu_initialise_dispersive_arrays() # Electric and magnetic field updates - set blocks per grid and initialise field arrays on GPU G.gpu_set_blocks_per_grid() G.gpu_initialise_arrays() # PML updates if G.pmls: # Prepare kernels pmlmodulelectric = 'gprMax.pml_updates.pml_updates_electric_' + G.pmlformulation + '_gpu' kernelelectricfunc = getattr(import_module(pmlmodulelectric), 'kernels_template_pml_electric_' + G.pmlformulation) pmlmodulemagnetic = 'gprMax.pml_updates.pml_updates_magnetic_' + G.pmlformulation + '_gpu' kernelmagneticfunc = getattr(import_module(pmlmodulemagnetic), 'kernels_template_pml_magnetic_' + G.pmlformulation) kernels_pml_electric = SourceModule(kernelelectricfunc.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) kernels_pml_magnetic = SourceModule(kernelmagneticfunc.substitute(REAL=cudafloattype, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsH.shape[1], NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for PML kernels updatecoeffsE = kernels_pml_electric.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_pml_magnetic.get_global('updatecoeffsH')[0] drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE) drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH) # Set block per grid, initialise arrays on GPU, and get kernel functions for pml in G.pmls: pml.gpu_initialise_arrays() pml.gpu_get_update_funcs(kernels_pml_electric, kernels_pml_magnetic) pml.gpu_set_blocks_per_grid(G) # Receivers if G.rxs: # Initialise arrays on GPU rxcoords_gpu, rxs_gpu = gpu_initialise_rx_arrays(G) # Prepare kernel and get kernel function kernel_store_outputs = SourceModule(kernel_template_store_outputs.substitute(REAL=cudafloattype, NY_RXCOORDS=3, NX_RXS=6, NY_RXS=G.iterations, NZ_RXS=len(G.rxs), NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), options=compiler_opts) store_outputs_gpu = kernel_store_outputs.get_function("store_outputs") # Sources - initialise arrays on GPU, prepare kernel and get kernel functions if G.voltagesources + G.hertziandipoles + G.magneticdipoles: kernels_sources = SourceModule(kernels_template_sources.substitute(REAL=cudafloattype, N_updatecoeffsE=G.updatecoeffsE.size, N_updatecoeffsH=G.updatecoeffsH.size, NY_MATCOEFFS=G.updatecoeffsE.shape[1], NY_SRCINFO=4, NY_SRCWAVES=G.iterations, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1, NX_ID=G.ID.shape[1], NY_ID=G.ID.shape[2], NZ_ID=G.ID.shape[3]), options=compiler_opts) # Copy material coefficient arrays to constant memory of GPU (must be <64KB) for source kernels updatecoeffsE = kernels_sources.get_global('updatecoeffsE')[0] updatecoeffsH = kernels_sources.get_global('updatecoeffsH')[0] drv.memcpy_htod(updatecoeffsE, G.updatecoeffsE) drv.memcpy_htod(updatecoeffsH, G.updatecoeffsH) if G.hertziandipoles: srcinfo1_hertzian_gpu, srcinfo2_hertzian_gpu, srcwaves_hertzian_gpu = gpu_initialise_src_arrays(G.hertziandipoles, G) update_hertzian_dipole_gpu = kernels_sources.get_function("update_hertzian_dipole") if G.magneticdipoles: srcinfo1_magnetic_gpu, srcinfo2_magnetic_gpu, srcwaves_magnetic_gpu = gpu_initialise_src_arrays(G.magneticdipoles, G) update_magnetic_dipole_gpu = kernels_sources.get_function("update_magnetic_dipole") if G.voltagesources: srcinfo1_voltage_gpu, srcinfo2_voltage_gpu, srcwaves_voltage_gpu = gpu_initialise_src_arrays(G.voltagesources, G) update_voltage_source_gpu = kernels_sources.get_function("update_voltage_source") # Snapshots - initialise arrays on GPU, prepare kernel and get kernel functions if G.snapshots: # Initialise arrays on GPU snapEx_gpu, snapEy_gpu, snapEz_gpu, snapHx_gpu, snapHy_gpu, snapHz_gpu = gpu_initialise_snapshot_array(G) # Prepare kernel and get kernel function kernel_store_snapshot = SourceModule(kernel_template_store_snapshot.substitute(REAL=cudafloattype, NX_SNAPS=Snapshot.nx_max, NY_SNAPS=Snapshot.ny_max, NZ_SNAPS=Snapshot.nz_max, NX_FIELDS=G.nx + 1, NY_FIELDS=G.ny + 1, NZ_FIELDS=G.nz + 1), options=compiler_opts) store_snapshot_gpu = kernel_store_snapshot.get_function("store_snapshot") # Iteration loop timer iterstart = drv.Event() iterend = drv.Event() iterstart.record() for iteration in tqdm(range(G.iterations), desc='Running simulation, model ' + str(currentmodelrun) + '/' + str(modelend), ncols=get_terminal_width() - 1, file=sys.stdout, disable=not G.progressbars): # Get GPU memory usage on final iteration if iteration == G.iterations - 1: memsolve = drv.mem_get_info()[1] - drv.mem_get_info()[0] # Store field component values for every receiver if G.rxs: store_outputs_gpu(np.int32(len(G.rxs)), np.int32(iteration), rxcoords_gpu.gpudata, rxs_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.rxs)), 1, 1)) # Store any snapshots for i, snap in enumerate(G.snapshots): if snap.time == iteration + 1: if not G.snapsgpu2cpu: store_snapshot_gpu(np.int32(i), np.int32(snap.xs), np.int32(snap.xf), np.int32(snap.ys), np.int32(snap.yf), np.int32(snap.zs), np.int32(snap.zf), np.int32(snap.dx), np.int32(snap.dy), np.int32(snap.dz), G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, snapEx_gpu.gpudata, snapEy_gpu.gpudata, snapEz_gpu.gpudata, snapHx_gpu.gpudata, snapHy_gpu.gpudata, snapHz_gpu.gpudata, block=Snapshot.tpb, grid=Snapshot.bpg) else: store_snapshot_gpu(np.int32(0), np.int32(snap.xs), np.int32(snap.xf), np.int32(snap.ys), np.int32(snap.yf), np.int32(snap.zs), np.int32(snap.zf), np.int32(snap.dx), np.int32(snap.dy), np.int32(snap.dz), G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, snapEx_gpu.gpudata, snapEy_gpu.gpudata, snapEz_gpu.gpudata, snapHx_gpu.gpudata, snapHy_gpu.gpudata, snapHz_gpu.gpudata, block=Snapshot.tpb, grid=Snapshot.bpg) gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(), snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), 0, snap) # Update magnetic field components update_h_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=G.tpb, grid=G.bpg) # Update magnetic field components with the PML correction for pml in G.pmls: pml.gpu_update_magnetic(G) # Update magnetic field components for magetic dipole sources if G.magneticdipoles: update_magnetic_dipole_gpu(np.int32(len(G.magneticdipoles)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_magnetic_gpu.gpudata, srcinfo2_magnetic_gpu.gpudata, srcwaves_magnetic_gpu.gpudata, G.ID_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.magneticdipoles)), 1, 1)) # Update electric field components # If all materials are non-dispersive do standard update if Material.maxpoles == 0: update_e_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=G.tpb, grid=G.bpg) # If there are any dispersive materials do 1st part of dispersive update # (it is split into two parts as it requires present and updated electric field values). else: update_e_dispersive_A_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata, G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, G.Hx_gpu.gpudata, G.Hy_gpu.gpudata, G.Hz_gpu.gpudata, block=G.tpb, grid=G.bpg) # Update electric field components with the PML correction for pml in G.pmls: pml.gpu_update_electric(G) # Update electric field components for voltage sources if G.voltagesources: update_voltage_source_gpu(np.int32(len(G.voltagesources)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_voltage_gpu.gpudata, srcinfo2_voltage_gpu.gpudata, srcwaves_voltage_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.voltagesources)), 1, 1)) # Update electric field components for Hertzian dipole sources (update any Hertzian dipole sources last) if G.hertziandipoles: update_hertzian_dipole_gpu(np.int32(len(G.hertziandipoles)), np.int32(iteration), floattype(G.dx), floattype(G.dy), floattype(G.dz), srcinfo1_hertzian_gpu.gpudata, srcinfo2_hertzian_gpu.gpudata, srcwaves_hertzian_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=(1, 1, 1), grid=(round32(len(G.hertziandipoles)), 1, 1)) # If there are any dispersive materials do 2nd part of dispersive update (it is split into two parts as it requires present and updated electric field values). Therefore it can only be completely updated after the electric field has been updated by the PML and source updates. if Material.maxpoles > 0: update_e_dispersive_B_gpu(np.int32(G.nx), np.int32(G.ny), np.int32(G.nz), np.int32(Material.maxpoles), G.updatecoeffsdispersive_gpu.gpudata, G.Tx_gpu.gpudata, G.Ty_gpu.gpudata, G.Tz_gpu.gpudata, G.ID_gpu.gpudata, G.Ex_gpu.gpudata, G.Ey_gpu.gpudata, G.Ez_gpu.gpudata, block=G.tpb, grid=G.bpg) # Copy output from receivers array back to correct receiver objects if G.rxs: gpu_get_rx_array(rxs_gpu.get(), rxcoords_gpu.get(), G) # Copy data from any snapshots back to correct snapshot objects if G.snapshots and not G.snapsgpu2cpu: for i, snap in enumerate(G.snapshots): gpu_get_snapshot_array(snapEx_gpu.get(), snapEy_gpu.get(), snapEz_gpu.get(), snapHx_gpu.get(), snapHy_gpu.get(), snapHz_gpu.get(), i, snap) iterend.record() iterend.synchronize() tsolve = iterstart.time_till(iterend) * 1e-3 # Remove context from top of stack and delete ctx.pop() del ctx return tsolve, memsolve
# Get machine/CPU/OS details hostinfo = get_host_info() try: machineIDlong = str(baseresult['machineID']) # machineIDlong = 'Dell PowerEdge R630; Intel(R) Xeon(R) CPU E5-2630 v3 @ 2.40GHz; Linux (3.10.0-327.18.2.el7.x86_64)' # Use to manually describe machine machineID = machineIDlong.split(';')[0] cpuID = machineIDlong.split(';')[1] cpuID = cpuID.split('GHz')[0].split('x')[1][1::] + 'GHz' except KeyError: hyperthreading = ', {} cores with Hyper-Threading'.format( hostinfo['logicalcores']) if hostinfo['hyperthreading'] else '' machineIDlong = '{}; {} x {} ({} cores{}); {} RAM; {}'.format( hostinfo['machineID'], hostinfo['sockets'], hostinfo['cpuID'], hostinfo['physicalcores'], hyperthreading, human_size(hostinfo['ram'], a_kilobyte_is_1024_bytes=True), hostinfo['osversion']) print('Host: {}'.format(machineIDlong)) # Base result - threads and times info from Numpy archive print('Model: {}'.format(args.baseresult)) for i in range(len(baseresult['cputhreads'])): print('{} CPU (OpenMP) thread(s): {:g} s'.format( baseresult['cputhreads'][i], baseresult['cputimes'][i])) baseplotlabel = os.path.splitext(os.path.split(args.baseresult)[1])[0] + '.in' # Base result - arrays for length of cubic model side and cells per second metric cells = np.array([baseresult['numcells'][0]]) cpucellspersec = np.array([ (baseresult['numcells'][0] * baseresult['numcells'][1] * baseresult['numcells'][2] * baseresult['iterations']) / baseresult['cputimes'][0]
def process_singlecmds(singlecmds, multicmds, G): """Checks the validity of command parameters and creates instances of classes of parameters. Args: singlecmds (dict): Commands that can only occur once in the model. multicmds (dict): Commands that can have multiple instances in the model (required to pass to process_materials_file function). G (class): Grid class instance - holds essential parameters describing the model. """ # Check validity of command parameters in order needed # messages cmd = '#messages' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') if singlecmds[cmd].lower() == 'y': G.messages = True elif singlecmds[cmd].lower() == 'n': G.messages = False else: raise CmdInputError(cmd + ' requires input values of either y or n') # Title cmd = '#title' if singlecmds[cmd] != 'None': G.title = singlecmds[cmd] if G.messages: print('Model title: {}'.format(G.title)) # Number of processors to run on (OpenMP) cmd = '#num_threads' ompthreads = os.environ.get('OMP_NUM_THREADS') if singlecmds[cmd] != 'None': tmp = tuple(int(x) for x in singlecmds[cmd].split()) if len(tmp) != 1: raise CmdInputError( cmd + ' requires exactly one parameter to specify the number of threads to use' ) if tmp[0] < 1: raise CmdInputError( cmd + ' requires the value to be an integer not less than one') G.nthreads = tmp[0] elif ompthreads: G.nthreads = int(ompthreads) else: # Set number of threads to number of physical CPU cores, i.e. avoid hyperthreading with OpenMP G.nthreads = psutil.cpu_count(logical=False) if G.messages: print('Number of threads: {}'.format(G.nthreads)) # Spatial discretisation cmd = '#dx_dy_dz' tmp = [float(x) for x in singlecmds[cmd].split()] if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') if tmp[0] <= 0: raise CmdInputError( cmd + ' requires the x-direction spatial step to be greater than zero') if tmp[1] <= 0: raise CmdInputError( cmd + ' requires the y-direction spatial step to be greater than zero') if tmp[2] <= 0: raise CmdInputError( cmd + ' requires the z-direction spatial step to be greater than zero') G.dx = tmp[0] G.dy = tmp[1] G.dz = tmp[2] if G.messages: print('Spatial discretisation: {:g} x {:g} x {:g}m'.format( G.dx, G.dy, G.dz)) # Domain cmd = '#domain' tmp = [float(x) for x in singlecmds[cmd].split()] if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.nx = round_value(tmp[0] / G.dx) G.ny = round_value(tmp[1] / G.dy) G.nz = round_value(tmp[2] / G.dz) if G.nx == 0 or G.ny == 0 or G.nz == 0: raise CmdInputError(cmd + ' requires at least one cell in every dimension') if G.messages: print( 'Domain size: {:g} x {:g} x {:g}m ({:d} x {:d} x {:d} = {:g} cells)' .format(tmp[0], tmp[1], tmp[2], G.nx, G.ny, G.nz, (G.nx * G.ny * G.nz))) # Guesstimate at memory usage mem = (((G.nx + 1) * (G.ny + 1) * (G.nz + 1) * 13 * np.dtype(floattype).itemsize + (G.nx + 1) * (G.ny + 1) * (G.nz + 1) * 18) * 1.1) + 30e6 print('Memory (RAM) usage: ~{} required, {} available'.format( human_size(mem), human_size(psutil.virtual_memory().total))) # Time step CFL limit - use either 2D or 3D (default) cmd = '#time_step_limit_type' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') if singlecmds[cmd].lower() == '2d': if G.nx == 1: G.dt = 1 / (c * np.sqrt((1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) elif G.ny == 1: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dz) * (1 / G.dz))) elif G.nz == 1: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy))) else: raise CmdInputError( cmd + ' 2D CFL limit can only be used when one dimension of the domain is one cell' ) elif singlecmds[cmd].lower() == '3d': G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) else: raise CmdInputError(cmd + ' requires input values of either 2D or 3D') else: G.dt = 1 / (c * np.sqrt((1 / G.dx) * (1 / G.dx) + (1 / G.dy) * (1 / G.dy) + (1 / G.dz) * (1 / G.dz))) # Round down time step to nearest float with precision one less than hardware maximum. Avoids inadvertently exceeding the CFL due to binary representation of floating point number. G.dt = round_value(G.dt, decimalplaces=d.getcontext().prec - 1) if G.messages: print('Time step: {:g} secs'.format(G.dt)) # Time step stability factor cmd = '#time_step_stability_factor' if singlecmds[cmd] != 'None': tmp = tuple(float(x) for x in singlecmds[cmd].split()) if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') if tmp[0] <= 0 or tmp[0] > 1: raise CmdInputError( cmd + ' requires the value of the time step stability factor to be between zero and one' ) G.dt = G.dt * tmp[0] if G.messages: print('Time step (modified): {:g} secs'.format(G.dt)) # Time window cmd = '#time_window' tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError( cmd + ' requires exactly one parameter to specify the time window. Either in seconds or number of iterations.' ) tmp = tmp[0].lower() # If real floating point value given if '.' in tmp or 'e' in tmp: if float(tmp) > 0: G.timewindow = float(tmp) G.iterations = round_value((float(tmp) / G.dt)) + 1 else: raise CmdInputError(cmd + ' must have a value greater than zero') # If number of iterations given else: G.timewindow = (int(tmp) - 1) * G.dt G.iterations = int(tmp) if G.messages: print('Time window: {:g} secs ({} iterations)'.format( G.timewindow, G.iterations)) # PML cmd = '#pml_cells' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 1 and len(tmp) != 6: raise CmdInputError(cmd + ' requires either one or six parameters') if len(tmp) == 1: G.pmlthickness = (int(tmp[0]), int(tmp[0]), int(tmp[0]), int(tmp[0]), int(tmp[0]), int(tmp[0])) else: G.pmlthickness = (int(tmp[0]), int(tmp[1]), int(tmp[2]), int(tmp[3]), int(tmp[4]), int(tmp[5])) if 2 * G.pmlthickness[0] >= G.nx or 2 * G.pmlthickness[ 1] >= G.ny or 2 * G.pmlthickness[2] >= G.nz or 2 * G.pmlthickness[ 3] >= G.nx or 2 * G.pmlthickness[ 4] >= G.ny or 2 * G.pmlthickness[5] >= G.nz: raise CmdInputError(cmd + ' has too many cells for the domain size') # src_steps cmd = '#src_steps' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.srcstepx = round_value(float(tmp[0]) / G.dx) G.srcstepy = round_value(float(tmp[1]) / G.dy) G.srcstepz = round_value(float(tmp[2]) / G.dz) if G.messages: print( 'All sources will step {:g}m, {:g}m, {:g}m for each model run.' .format(G.srcstepx * G.dx, G.srcstepy * G.dy, G.srcstepz * G.dz)) # rx_steps cmd = '#rx_steps' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 3: raise CmdInputError(cmd + ' requires exactly three parameters') G.rxstepx = round_value(float(tmp[0]) / G.dx) G.rxstepy = round_value(float(tmp[1]) / G.dy) G.rxstepz = round_value(float(tmp[2]) / G.dz) if G.messages: print( 'All receivers will step {:g}m, {:g}m, {:g}m for each model run.' .format(G.rxstepx * G.dx, G.rxstepy * G.dy, G.rxstepz * G.dz)) # Excitation file for user-defined source waveforms cmd = '#excitation_file' if singlecmds[cmd] != 'None': tmp = singlecmds[cmd].split() if len(tmp) != 1: raise CmdInputError(cmd + ' requires exactly one parameter') excitationfile = tmp[0] # See if file exists at specified path and if not try input file directory if not os.path.isfile(excitationfile): excitationfile = os.path.join(G.inputdirectory, excitationfile) # Get waveform names with open(excitationfile, 'r') as f: waveformIDs = f.readline().split() # Read all waveform values into an array waveformvalues = np.loadtxt(excitationfile, skiprows=1, dtype=floattype) for waveform in range(len(waveformIDs)): if any(x.ID == waveformIDs[waveform] for x in G.waveforms): raise CmdInputError( 'Waveform with ID {} already exists'.format( waveformIDs[waveform])) w = Waveform() w.ID = waveformIDs[waveform] w.type = 'user' if len(waveformvalues.shape) == 1: w.uservalues = waveformvalues[:] else: w.uservalues = waveformvalues[:, waveform] if G.messages: print('User waveform {} created.'.format(w.ID)) G.waveforms.append(w)