def main(self): """ Setup the example and then optionally run the actual seisflows workflow """ print(msg.cli("EXAMPLE SETUP", border="=")) # Step 1: Download and configure SPECFEM2D, make binaries. Optional self.download_specfem2d() self.configure_specfem2d_and_make_binaries() # Step 2: Create a working directory and generate initial/final models self.create_specfem2d_working_directory() # Step 2a: Generate MODEL_INIT, rearrange consequent directory structure print(msg.cli("GENERATING INITIAL MODEL", border="=")) self.setup_specfem2d_for_model_init() self.run_xspecfem2d_binaries() self.cleanup_xspecfem2d_run(choice="INIT") # Step 2b: Generate MODEL_INIT, rearrange consequent directory structure print(msg.cli("GENERATING TRUE/TARGET MODEL", border="=")) self.setup_specfem2d_for_model_true() self.run_xspecfem2d_binaries() self.cleanup_xspecfem2d_run(choice="TRUE") # Step 3: Prepare Par_file and directory for MODEL_TRUE generation self.setup_seisflows_working_directory() self.finalize_specfem2d_par_file() print(msg.cli("COMPLETE EXAMPLE SETUP", border="=")) # Step 4: Run the workflwo if self.run_example: print(msg.cli("RUNNING SEISFLOWS3 INVERSION WORKFLOW", border="=")) self.run_sf3_example()
def __init__(self, ntask=3, niter=2): """ Set path structure which is used to navigate around SPECFEM repositories and the example working directory :type ntask: int :param ntask: number of events to use in inversion, between 1 and 25. defaults to 3 :type niter: int :param niter: number of iterations to run. defaults to 2 """ specfem2d_repo = input( msg.cli("If you have already downloaded SPECMFE2D, please input " "the full path to the repo. If left blank, this example " "will pull the latest version from GitHub and attempt " "to configure and make the binaries:\n> ")) self.cwd = os.getcwd() self.sem2d_paths, self.workdir_paths = self.define_dir_structures( cwd=self.cwd, specfem2d_repo=specfem2d_repo) self.ntask = ntask assert(1 <= self.ntask <= 25), \ f"number of tasks/events must be between 1 and 25, not {self.ntask}" self.niter = niter assert(1 <= self.niter <= np.inf), \ f"number of iterations must be between 1 and inf, not {self.niter}" # This bool information is provided by the User running 'setup' or 'run' self.run_example = bool(sys.argv[1] == "run") # Command line tool to use $ seisflows <cmd> from inside Python # Zero out sys.argv to ensure that no arguments are given to the CLI sys.argv = [sys.argv[0]] self.sf = SeisFlows()
def export_residuals(self, path): """ File transfer utility. Export residuals to disk. :type path: str :param path: path to save residuals """ if self.taskid == 0: self.logger.debug(f"exporting residuals to:\n{path}") unix.mkdir(os.path.join(path, "residuals")) src = os.path.join(self.cwd, "residuals") # If this residuals directory has not been created, something # has gone wrong with the preprocessing and workflow cannot proceed if not os.path.exists(src): print( msg.cli( "The Solver function 'export_residuals' expected " "'residuals' directories to be created but could not " "find them and cannot continue the workflow. Please " "check the preprocess.prepare_eval_grad() function", header="preprocess error", border="=")) sys.exit(-1) dst = os.path.join(path, "residuals", self.source_name) unix.mv(src, dst)
def check_poissons_ratio(vp, vs, min_val=-1., max_val=0.5): """ Check Poisson's ratio based on Vp and Vs model vectors. Exit SeisFlows3 if Poisson's ratio is outside `min_val` or `max_val` which by default are set internally by SPECFEM. Otherwise return the value :type vp: np.array :param vp: P-wave velocity model vector :type vs: np.array :param vp: S-wave velocity model vector :type min_val: float :param min_val: minimum model-wide acceptable value for poissons ratio :type max_val: float :param max_val: maximum model-wide acceptable value for poissons ratio :return: """ poissons = poissons_ratio(vp=vp, vs=vs) pmin = poissons.min() pmax = poissons.max() if (pmin < min_val) or (pmax > max_val): print( msg.cli( f"The Poisson's ratio of the given model is out of " f"bounds with respect to the defined range " f"({min_val}, {max_val}). " f"The model bounds were found to be:", items=[f"{pmin:.2f} < PR < {pmax:.2f}"], border="=", header="Poisson's Ratio Error")) sys.exit(-1) return poissons
def check_source_names(self): """ Determines names of sources by applying wildcard rule to user-supplied input files .. note:: Source list is sorted and collected from start up to PAR.NTASK """ # Apply wildcard rule and check for available sources, exit if no # sources found because then we can't proceed wildcard = f"{self.source_prefix}_*" fids = sorted(glob(os.path.join(PATH.SPECFEM_DATA, wildcard))) if not fids: print( msg.cli( "No matching source files when searching PATH for" "the given WILDCARD", items=[ f"PATH: {PATH.SPECFEM_DATA}", f"WILDCARD: {wildcard}" ], header="error")) sys.exit(-1) # Create internal definition of sources names by stripping prefixes names = [os.path.basename(fid).split("_")[-1] for fid in fids] self._source_names = names[:PAR.NTASK]
def job_status(self, classname, method, jobs): """ Queries completion status of a single job :type job: str :param job: job id to query """ job_finished = [] for job in jobs: state = self._query(job) if state == "DONE": job_finished.append(True) else: job_finished.append(False) if state == "EXIT": print( msg.cli( f"LSF job {job} failed to execute " f"{classname}.{method}.", header="error", border="=")) sys.exit(-1) isdone = all(job_finished) return isdone, jobs
def sum_residuals(self, files): """ Averages the event misfits and returns the total misfit. Total misfit defined by Tape et al. (2010) :type files: str :param files: list of single-column text files containing residuals that will have been generated using prepare_eval_grad() :rtype: float :return: average misfit """ if len(files) != PAR.NTASK: print(msg.cli(f"Pyatoa preprocessing module did not recover the " f"correct number of residual files " f"({len(files)}/{PAR.NTASK}). Please check that " f"the preprocessing logs", header="error") ) sys.exit(-1) total_misfit = 0 for filename in files: total_misfit += np.sum(np.loadtxt(filename)) total_misfit /= PAR.NTASK return total_misfit
def __init__(self, ntask=2, niter=1, nsta=5): """ Overload init and attempt to import Pyatoa before running example, overload the default number of tasks to 2, and add a new init parameter `nsta` which chooses the number of stations, between 1 and 132 :type ntask: int :param ntask: number of events to use in inversion, between 1 and 25. defaults to 3 :type niter: int :param niter: number of iterations to run. defaults to 2 :type nsta: int :param nsta: number of stations to include in inversion, between 1 and 131 """ super().__init__(ntask=ntask, niter=niter) self.nsta = nsta # -1 because it represents index but we need to talk in terms of count assert(1 <= self.nsta <= 131), \ f"number of stations must be between 1 and 131, not {self.nsta}" # Make sure that Pyatoa has been installed before running try: import pyatoa except ModuleNotFoundError: print( msg.cli( "Module Pyatoa not found but is required for this " "example. Please install Pyatoa and rerun this " "example.", header="module not found error", border="=")) sys.exit(-1)
def check_solver_parameter_files(self): """ Checks solver parameters """ # Check the number of steps in the SPECFEM2D Par_file nt_str, nt, nt_i = getpar(key="NSTEP", file="DATA/Par_file") if int(nt) != PAR.NT: if self.taskid == 0: print( msg.cli( f"SPECFEM3D {nt_str}=={nt} is not equal " f"SeisFlows3 PAR.NT=={PAR.NT}. Please ensure " f"that these values match in both files.", header="parameter match error", border="=")) sys.exit(-1) dt_str, dt, dt_i = getpar(key="DT", file="DATA/Par_file") if float(dt) != PAR.DT: if self.taskid == 0: print( msg.cli( f"SPECFEM3D {dt_str}=={dt} is not equal " f"SeisFlows3 PAR.DT=={PAR.DT}. Please ensure " f"that these values match in both files.", header="parameter match error", border="=")) sys.exit(-1) # Ensure that NPROC matches the MESH values nproc = self.mesh_properties.nproc if nproc != PAR.NPROC: if self.taskid == 0: print( msg.cli( f"SPECFEM3D mesh NPROC=={nproc} is not equal " f"SeisFlows3 PAR.NPROC=={PAR.NPROC}. " f"Please check that your mesh matches this val.", header="parameter match error", border="=")) sys.exit(-1) if "MULTIPLES" in PAR: raise NotImplementedError
def prepare_eval_grad(self, cwd, source_name, taskid, **kwargs): """ Prepare the gradient evaluation by gathering, preprocessing waveforms, and measuring misfit between observations and synthetics using Pyatoa. Reads in observed and synthetic waveforms, applies optional preprocessing, assesses misfit, and writes out adjoint sources and STATIONS_ADJOINT file. .. note:: Meant to be called by solver.eval_func(), may have unused arguments to keep functions general across preprocessing subclasses. :type cwd: str :param cwd: current specfem working directory containing observed and synthetic seismic data to be read and processed. Should be defined by solver.cwd :type source_name: str :param source_name: the event id to be used for tagging and data lookup. Should be defined by solver.source_name :type taskid: int :param taskid: identifier of the currently running solver instance. Should be defined by solver.taskid :type filenames: list of str :param filenames: [not used] list of filenames defining the files in traces """ if taskid == 0: self.logger.debug("preparing files for gradient evaluation with " "Pyaflowa") # Process all the stations for a given event using Pyaflowa pyaflowa = self.setup_event_pyaflowa(source_name) scaled_misfit = pyaflowa.process() if scaled_misfit is None: print(msg.cli(f"Event {source_name} returned no misfit, you may " f"want to check logs and waveform figures, " f"or consider discarding this event from your " f"workflow", items=[pyaflowa.paths.logs, pyaflowa.paths.figures], header="pyatoa preprocessing error", border="=")) sys.exit(-1) # Event misfit defined by Tape et al. (2010) written to solver dir. self.write_residuals(path=cwd, scaled_misfit=scaled_misfit)
def process_kernels(path, logger): """ Sums kernels from individual sources, with optional smoothing .. note:: This function needs to be run on system, i.e., called by system.run(single=True) :type path: str :param path: directory containing sensitivity kernels in the scratch directory :type logger: Logger :param logger: Class-specific logging module, log statements pushed from this logger will be tagged by its specific module/classname """ if not os.path.exists(path): print( msg.cli( "Gradient path in postprocess.process_kernels " "does not exist but should", items=[path], header="error")) sys.exit(-1) # If specified, smooth the kernels in the vertical and horizontal path_sum_nosmooth = os.path.join(path, "sum_nosmooth") path_sum = os.path.join(path, "sum") if (PAR.SMOOTH_H > 0) or (PAR.SMOOTH_V > 0): logger.debug(f"saving unsmoothed and summed kernels to:\n" f"{path_sum_nosmooth}") solver.combine(input_path=path, output_path=path_sum_nosmooth) logger.info(f"smoothing gradient: H={PAR.SMOOTH_H}m, " f"V={PAR.SMOOTH_V}m") logger.debug(f"saving smoothed kernels to:\n{path_sum}") solver.smooth(input_path=path_sum_nosmooth, output_path=path_sum, span_h=PAR.SMOOTH_H, span_v=PAR.SMOOTH_V) # Combine all the input kernels, generating the unscaled gradient else: logger.debug(f"saving summed kernels to:\n{path_sum}") solver.combine(input_path=path, output_path=path_sum)
def check_mesh_properties(self, path=None): """ Determine if Mesh properties are okay for workflow :type path: str :param path: path to the mesh file """ # Check the given model path or the initial model if path is None: path = PATH.MODEL_INIT if not exists(path): print( msg.cli(f"The following mesh path does not exist but should", items=[path], header="solver error", border="=")) sys.exit(-1) # Count slices and grid points key = self.parameters[0] iproc = 0 ngll = [] while True: dummy = self.io.read_slice(path=path, parameters=key, iproc=iproc)[0] ngll += [len(dummy)] iproc += 1 if not exists(os.path.join(path, f"proc{int(iproc):06d}_{key}.bin")): break nproc = iproc # Create coordinate pointers # !!! This partial is incorrectly defined and does not execute when # !!! called. What is the point of that? coords = Struct() for key in ['x', 'y', 'z']: coords[key] = partial(self.io.read_slice, self, path, key) # Define internal mesh properties self._mesh_properties = Struct([["nproc", nproc], ["ngll", ngll], ["path", path], ["coords", coords]])
def data_filenames(self): """ Returns the filenames of all data, either by the requested components or by all available files in the directory. .. note:: If the glob returns an empty list, this function exits the workflow because filenames should not be empty is they're being queried :rtype: list :return: list of data filenames """ unix.cd(self.cwd) unix.cd(os.path.join("traces", "obs")) if PAR.COMPONENTS: filenames = [] if PAR.FORMAT.upper() == "SU": for comp in PAR.COMPONENTS: filenames += [self.data_wildcard.format(comp=comp.lower())] # filenames += [f"U{comp.lower()}_file_single.su"] elif PAR.FORMAT.upper() == "ASCII": for comp in PAR.COMPONENTS: filenames += glob( self.data_wildcard.format(comp=comp.upper())) # filenames += glob(f"*.?X{comp.upper()}.sem?") else: filenames = glob(self.data_wildcard) if not filenames: print( msg.cli( "The property solver.data_filenames, used to search " "for traces in 'scratch/solver/*/traces' is empty " "and should not be. Please check solver parameters: ", items=[f"data_wildcard: {self.data_wildcard}"], header="data filenames error", border="=")) sys.exit(-1) return filenames
def taskid(self): """ Provides a unique identifier for each running task, which should be set by the 'run'' command. :rtype: int :return: returns the os environment variable SEISFLOWS_TASKID which is set by run() to label each of the currently running processes on the SYSTEM. """ sftaskid = os.getenv("SEISFLOWS_TASKID") if sftaskid is None: print( msg.cli( "system.taskid() environment variable not found. " "Assuming DEBUG mode and returning taskid==0. " "If not DEBUG mode, please check SYSTEM.run()", header="warning", border="=")) sftaskid = 0 return int(sftaskid)
def call_solver(mpiexec, executable, output="solver.log"): """ Calls MPI solver executable to run solver binaries, used by individual processes to run the solver on system. If the external solver returns a non-zero exit code (failure), this function will return a negative boolean. :type mpiexec: str :param mpiexec: call to mpi. If None (e.g., serial run, defaults to ./) :type executable: str :param executable: executable function to call :type output: str :param output: where to redirect stdout """ # mpiexec is None when running in serial mode, so e.g., ./xmeshfem2D if mpiexec is None: exc_cmd = f"./{executable}" # Otherwise mpiexec is system dependent (e.g., srun, mpirun) else: exc_cmd = f"{mpiexec} {executable}" try: # Write solver stdout (log files) to text file f = open(output, "w") subprocess.run(exc_cmd, shell=True, check=True, stdout=f) except (subprocess.CalledProcessError, OSError) as e: print( msg.cli( "The external numerical solver has returned a nonzero " "exit code (failure). Consider stopping any currently " "running jobs to avoid wasted computational resources. " f"Check 'scratch/solver/mainsolver/{output}' for the " f"solvers stdout log message. " f"The failing command and error message are: ", items=[f"exc: {exc_cmd}", f"err: {e}"], header="external solver error", border="=")) sys.exit(-1) finally: f.close()
def taskid(self): """ Provides a unique identifier for each running task :rtype: int :return: identifier for a given task """ # If not set, this environment variable will return None sftaskid = os.getenv("SEISFLOWS_TASKID") if sftaskid is None: sftaskid = os.getenv("SLURM_ARRAY_TASK_ID") if sftaskid is None: print( msg.cli( "system.taskid() environment variable not found. " "Assuming DEBUG mode and returning taskid==0. " "If not DEBUG mode, please check SYSTEM.run()", header="warning", border="=")) sftaskid = 0 return int(sftaskid)
def eval_grad(self, path, export_traces=False): """ High level solver interface that evaluates gradient by carrying out adjoint simulations. A function evaluation must already have been carried out. :type path: str :param path: directory from which model is imported :type export_traces: bool :param export_traces: if True, save traces to OUTPUT. if False, discard traces """ unix.cd(self.cwd) if self.taskid == 0: self.logger.debug("running adjoint simulations") # Check to make sure that preprocessing module created adjoint traces adjoint_traces_wildcard = os.path.join("traces", "adj", "*") if not glob(adjoint_traces_wildcard): print( msg.cli( f"Event {self.source_name} has no adjoint traces, " f"which will lead to an external solver error. " f"Please check that solver.eval_func() executed " f"properly", border="=", header="solver error")) sys.exit(-1) self.adjoint() self.export_kernels(path) if export_traces: self.export_traces(path=os.path.join(path, "traces", "syn"), prefix="traces/syn") self.export_traces(path=os.path.join(path, "traces", "adj"), prefix="traces/adj")
def check_stop_resume_cond(self, flow): """ Chek the stop after and resume from conditions Allow the main() function to resume a workflow from a given flow argument, or stop the workflow after a given argument. In the event that a previous workflow errored, or if the User had previously stopped a workflow to look at results and they want to pick up where they left off. Late check: Exits the workflow if RESUME_FROM or STOP_AFTER arguments do not match any of the given flow arguments. :type flow: tuple of functions :param flow: an ordered list of functions that will be :rtype: tuple of int :return: (start, stop) indices of the `flow` input dictating where the list should be begun and ended. If RESUME_FROM and STOP_AFTER conditions are NOT given by the user, start and stop will be 0 and -1 respectively, meaning we should execute the ENTIRE list """ fxnames = [func.__name__ for func in flow] # Default values which dictate that flow will execute in its entirety start_idx = None stop_idx = None # Overwrite start_idx if RESUME_FROM given, exit condition if no match if PAR.RESUME_FROM: try: start_idx = fxnames.index(PAR.RESUME_FROM) fx_name = flow[start_idx].__name__ self.logger.info( msg.mnr(f"WORKFLOW WILL RESUME FROM FUNC: '{fx_name}'")) except ValueError: self.logger.info( msg.cli( f"{PAR.RESUME_FROM} does not correspond to any FLOW " f"functions. Please check that PAR.RESUME_FROM " f"matches one of the functions listed out in " f"`seisflows print flow`.", header="error", border="=")) sys.exit(-1) # Overwrite stop_idx if STOP_AFTER provided, exit condition if no match if PAR.STOP_AFTER: try: stop_idx = fxnames.index(PAR.STOP_AFTER) fx_name = flow[stop_idx].__name__ stop_idx += 1 # increment to stop AFTER, due to python indexing self.logger.info( msg.mnr(f"WORKFLOW WILL STOP AFTER FUNC: '{fx_name}'")) except ValueError: self.logger.info( msg.cli( f"{PAR.STOP_AFTER} does not correspond to any FLOW " f"functions. Please check that PAR.STOP_AFTER " f"matches one of the functions listed out in " f"`seisflows print flow`.", header="error", border="=")) sys.exit(-1) # Make sure stop after doesn't come before resume_from, otherwise none # of the flow will execute if PAR.STOP_AFTER and PAR.RESUME_FROM: if stop_idx <= start_idx: self.logger.info( msg.cli( f"PAR.STOP_AFTER=='{PAR.STOP_AFTER}' is called " f"before PAR.RESUME_FROM=='{PAR.RESUME_FROM}' in " f"the FLOW functions. Please adjust accordingly " f"and rerun.", header="error", border="=")) sys.exit(-1) return start_idx, stop_idx
def run(self, classname, method, single=False, run_call=None, **kwargs): """ Runs task multiple times in embarrassingly parallel fasion on a SLURM cluster. Executes classname.method(*args, **kwargs) `NTASK` times, each time on `NPROC` CPU cores .. note:: The actual CLI call structure looks something like this $ sbatch --args scripts/run OUTPUT class method environs :type classname: str :param classname: the class to run :type method: str :param method: the method from the given `classname` to run :type single: bool :param single: run a single-process, non-parallel task, such as smoothing the gradient, which only needs to be run by once. This will change how the job array and the number of tasks is defined, such that the job is submitted as a single-core job to the system. :type run_call: str :param run_call: subclasses (e.g., specific SLURM cluster subclasses) can overload the sbatch command line input by setting run_call. If set to None, default run_call will be set here. """ self.checkpoint(PATH.OUTPUT, classname, method, kwargs) # Default sbatch command line input, can be overloaded by subclasses # Copy-paste this default run_call and adjust accordingly for subclass if run_call is None: run_call = " ".join([ "sbatch", f"{PAR.SLURMARGS or ''}", f"--job-name={PAR.TITLE}", f"--nodes={math.ceil(PAR.NPROC/float(PAR.NODESIZE)):d}", f"--ntasks-per-node={PAR.NODESIZE:d}", f"--ntasks={PAR.NPROC:d}", f"--time={PAR.TASKTIME:d}", f"--output={os.path.join(PATH.WORKDIR, 'logs', '%A_%a')}", f"--array=0-{PAR.NTASK-1 % PAR.NTASKMAX}", f"{os.path.join(ROOT_DIR, 'scripts', 'run')}", f"--output {PATH.OUTPUT}", f"--classname {classname}", f"--funcname {method}", f"--environment {PAR.ENVIRONS or ''}" ]) self.logger.debug(run_call) # Single-process jobs simply need to replace a few sbatch arguments. # Do it AFTER `run_call` has been defined so that subclasses submitting # custom run calls can still benefit from this if single: self.logger.info("replacing parts of sbatch run call for single " "process job") for part in run_call.split(" "): if "--array" in part: run_call.replace(part, "--array=0-0") elif "--ntasks" in part: run_call.replace(part, "--ntasks=1") # Append taskid to environment variable, deal with the case where # PAR.ENVIRONS is an empty string task_id_str = "SEISFLOWS_TASKID=0" if not run_call.strip().endswith("--environment"): task_id_str = f",{task_id_str}" # appending to the list of vars run_call += task_id_str self.logger.debug(run_call) # The standard response from SLURM when submitting jobs # is something like 'Submitted batch job 441636', we want job number stdout = subprocess.run(run_call, stdout=subprocess.PIPE, text=True, shell=True).stdout job_ids = job_id_list(stdout, single) # Contiously check for job completion on ALL running array jobs is_done = False count = 0 bad_states = [ "TIMEOUT", "FAILED", "NODE_FAIL", "OUT_OF_MEMORY", "CANCELLED" ] while not is_done: # Wait a bit to avoid rapidly querying sacct time.sleep(5) is_done, states = job_array_status(job_ids) # EXIT CONDITION: if any of the jobs provide job failure codes if not is_done: for i, state in enumerate(states): # Sometimes states can be something like 'CANCELLED+', so # we can't do exact string matching, check partial matches if any([check in state for check in bad_states]): print( msg.cli((f"Stopping workflow for {state} job. " f"Please check log file for details."), items=[ f"TASK: {classname}.{method}", f"TASK ID: {job_ids[i]}", f"LOG: logs/{job_ids[i]}", f"SBATCH: {run_call}" ], header="slurm run error", border="=")) sys.exit(-1) # WAIT CONDITION: if sacct is not working, we'll get stuck in a loop if "UNDEFINED" in states: count += 1 # Every 10 counts, warn the user this is unexpected behavior if not count % 10: job_id = job_ids[states.index("UNDEFINED")] self.logger.warning( f"SLURM command 'sacct {job_id}' has " f"returned unexpected response {count} " f"times. This job may have failed " f"unexpectedly. Consider checking " f"manually") self.logger.info(f"Task {classname}.{method} finished successfully")
if self.run_example: print(msg.cli("RUNNING SEISFLOWS3 INVERSION WORKFLOW", border="=")) self.run_sf3_example() if __name__ == "__main__": print(msg.ascii_logo_small) print( msg.cli( f"This is a [SPECFEM2D] [WORKSTATION] example, which will " f"run an inversion to assess misfit between two homogeneous halfspace " f"models with slightly different velocities. [3 events, 1 station, 2 " f"iterations]. The tasks involved include: ", items=[ "1. (optional) Download, configure, compile SPECFEM2D", "2. Set up a SPECFEM2D working directory", "3. Generate starting model from Tape2007 example", "4. Generate target model w/ perturbed starting model", "5. Set up a SeisFlows3 working directory", f"6. Run an inversion workflow" ], header="seisflows3 example 1", border="=")) # Dynamically traverse sys.argv to get user-input command line. Cannot # use argparser here because we're being called by SeisFlows CLI tool which # is occupying argparser if len(sys.argv) > 1: sf3ex2d = SF3Example2D() sf3ex2d.main()
def check_solver_parameter_files(self): """ Checks SPECFEM2D Par_file for acceptable parameters and matches with the internally set parameters """ # Check the number of steps in the SPECFEM2D Par_file nt_str, nt, nt_i = getpar(key="NSTEP", file="DATA/Par_file") if int(nt) != PAR.NT: if self.taskid == 0: print( msg.cli( f"SPECFEM2D {nt_str}=={nt} is not equal " f"SeisFlows3 PAR.NT=={PAR.NT}. Please ensure " f"that these values match in both files.", header="parameter match error", border="=")) sys.exit(-1) dt_str, dt, dt_i = getpar(key="DT", file="DATA/Par_file") if float(dt) != PAR.DT: if self.taskid == 0: print( msg.cli( f"SPECFEM2D {dt_str}=={dt} is not equal " f"SeisFlows3 PAR.DT=={PAR.DT}. Please ensure " f"that these values match in both files.", header="parameter match error", border="=")) sys.exit(-1) # Check the central frequency in the SPECFEM2D SOURCE file f0_str, f0, f0_i = getpar(key="f0", file="DATA/SOURCE") if float(f0) != PAR.F0: if self.taskid == 0: print( msg.cli( f"SPECFEM2D {f0_str}=={f0} is not equal " f"SeisFlows3 PAR.F0=={PAR.F0}. Please ensure " f"that these values match the DATA/SOURCE file.", header="parameter match error", border="=")) sys.exit(-1) # Ensure that NPROC matches the MESH values nproc = self.mesh_properties.nproc if nproc != PAR.NPROC: if self.taskid == 0: print( msg.cli( f"SPECFEM2D mesh NPROC=={nproc} is not equal" f"SeisFlows3 PAR.NPROC=={PAR.NPROC}. " f"Please check that your mesh matches this val.", header="parameter match error", border="=")) sys.exit(-1) if "MULTIPLES" in PAR: if PAR.MULTIPLES: setpar(key="absorbtop", val=".false.", file="DATA/Par_file") else: setpar(key="absorbtop", val=".true.", file="DATA/Par_file")
def write_gradient(self, path): """ Combines contributions from individual sources and material parameters to get the gradient, and optionally applies user-supplied scaling .. note:: Because processing operations can be quite expensive, they must be run through the HPC system interface; processing does not involve embarassingly parallel tasks, we use run(single=True) :type path: str :param path: directory from which kernels are read and to which gradient is written """ if not os.path.exists(path): print( msg.cli( "Gradient path does in postprocess.write_gradient " "does not exist but should", items=[path], header="error")) sys.exit(-1) # Postprocess file structure defined here once-and-for-all path_grad = os.path.join(path, "gradient") path_grad_nomask = os.path.join(path, "gradient_nomask") path_kernels = os.path.join(path, "kernels") path_kernels_sum = os.path.join(path_kernels, "sum") path_model = os.path.join(path, "model") # Run postprocessing as job on system as it's computationally intensive self.logger.info("processing kernels into gradient on system...") system.run("postprocess", "process_kernels", single=True, path=path_kernels, logger=self.logger) # Access the gradient information stored in the kernel summation gradient = solver.load(path_kernels_sum, suffix="_kernel") # Merge the gradients into a single vector gradient = solver.merge(gradient) # Convert to absolute perturbations: # log dm --> dm (see Eq.13 Tromp et al 2005) gradient *= solver.merge(solver.load(path_model)) if PATH.MASK: self.logger.info(f"masking gradient") # to scale the gradient, users can supply "masks" by exactly # mimicking the file format in which models are stored mask = solver.merge(solver.load(PATH.MASK)) # While both masking and preconditioning involve scaling the # gradient, they are fundamentally different operations: # masking is ad hoc, preconditioning is a change of variables; # For more info, see Modrak & Tromp 2016 GJI solver.save(solver.split(gradient), path=path_grad_nomask, suffix="_kernel") solver.save(solver.split(gradient * mask), path=path_grad, suffix="_kernel") else: solver.save(solver.split(gradient), path=path_grad, suffix="_kernel")
def custom_import(name=None, module=None, classname=None): """ Imports SeisFlows module and extracts class that is the camelcase version of the module name For example: custom_import('workflow', 'inversion') imports 'seisflows.workflow.inversion' and, from this module, extracts class 'Inversion'. :type name: str :param name: component of the workflow to import, defined by `names`, available: "system", "preprocess", "solver", "postprocess", "optimize", "workflow" :type module: module within the workflow component to call upon, e.g. seisflows.workflow.inversion, where `inversion` is the module :type classname: str :param classname: the class to be called from the module. Usually this is just the CamelCase version of the module, which will be defaulted to if this parameter is set `None`, however allows for custom class naming. Note: CamelCase class names following PEP-8 convention. """ # Parse input arguments for custom import # Allow empty system to be called so that import error message can be thrown if name is None: print( msg.cli( "Please check that 'custom_import' utility is being used as " "follows: custom_import(name, module). The resulting full dotted " "name 'seisflows3.name.module' must correspond to a module " "within this package.", header="custom import error", border="=")) sys.exit(-1) # Invalid `system` call elif name not in NAMES: print( msg.cli( "Please check that the use of custom_import(name, module, class) " "is implemented correctly, where name must be in the following:", items=NAMES, header="custom import error", border="=")) sys.exit(-1) # Attempt to retrieve currently assigned classname from parameters if module is None: try: module = sys.modules[PAR][name.upper()] except KeyError: return Null # If this still returns nothing, then no module has been assigned # likely the User has turned this module OFF if module is None: return Null # If no method specified, convert classname to PEP-8 if classname is None: # Make a distinction for fully uppercase classnames, e.g. LBFGS if module.isupper(): classname = module.upper() # If normal classname, convert to CamelCase else: classname = module.title().replace("_", "") # Check if modules exist, otherwise raise custom exception _exists = False for package in PACKAGES: full_dotted_name = ".".join([package, name, module]) if module_exists(full_dotted_name): _exists = True break if not _exists: print( msg.cli( f"The following module was not found within the package: " f"seisflows3.{name}.{module}", header="custom import error", border="=")) sys.exit(-1) # If importing the module doesn't work, throw an error. Usually this happens # when am external dependency isn't available, e.g., Pyatoa try: module = import_module(full_dotted_name) except Exception as e: print( msg.cli(f"Module could not be imported {full_dotted_name}", items=[str(e)], header="custom import error", border="=")) print(traceback.print_exc()) sys.exit(-1) # Extract classname from module if possible try: return getattr(module, classname) except AttributeError: print( msg.cli(f"The following method was not found in the imported " f"class: seisflows3.{name}.{module}.{classname}")) sys.exit(-1)
def init_seisflows(check=True): """ Instantiates SeisFlows3 objects and makes them globally accessible by registering them in sys.modules :type check: bool :param check: Run parameter and path checking, defined in the module.check() functions. By default should be True, to ensure that paths and parameters are set correctly. It should only be set False for debug and testing purposes when we need to force our way past this safeguard. """ logger.info("initializing SeisFlows3 in sys.modules") # Parameters and paths must already be loaded (normally done by submit) assert (PAR in sys.modules) assert (PATH in sys.modules) # Check if objects already exist on disk, exit so as to not overwrite if "OUTPUT" in sys.modules[PATH] and \ os.path.exists(sys.modules[PATH]["OUTPUT"]): print( msg.cli("Data from previous workflow found in working directory.", items=[ "> seisflows restart: delete data and start new " "workflow", "> seisflows resume: resume existing workflow" ], header="warning", border="=")) sys.exit(-1) # Instantiate and register objects for name in NAMES: sys.modules[f"seisflows_{name}"] = custom_import(name)() # Parameter import error checking, missing or improperly set parameters will # throw assertion errors if check: errors = [] for name in NAMES: try: sys.modules[f"seisflows_{name}"].check() except AssertionError as e: errors.append(f"{name}: {e}") if errors: print( msg.cli("seisflows.config module check failed with:", items=errors, header="module check error", border="=")) sys.exit(-1) # Bare minimum module requirements for SeisFlows3 req_modules = ["WORKFLOW", "SYSTEM"] for req in req_modules: if not hasattr(sys.modules[PAR], req): print( msg.cli( f"SeisFlows3 requires defining: {req_modules}." "Please specify these in the parameter file. Use " "'seisflows print module' to determine suitable " "choices.", header="error", border="=")) sys.exit(-1)
print(f"> EX2: Using {self.nsta} stations in this inversion workflow") with open("STATIONS", "w") as f: f.writelines(lines[:self.nsta]) if __name__ == "__main__": print(msg.ascii_logo_small) print( msg.cli( f"This is a [SPECFEM2D] [WORKSTATION] example, which will " f"run an inversion to assess misfit between a homogeneous halfspace " f"and checkerboard model using Pyatoa for misfit quantification " f"[2 events, 5 stations, 1 iterations]. The tasks involved include: ", items=[ "1. (optional) Download, configure, compile SPECFEM2D", "2. Set up a SPECFEM2D working directory", "3. Generate starting model from Tape2007 example", "4. Generate target model w/ perturbed starting model", "5. Set up a SeisFlows3 working directory", f"6. Run an inversion workflow. The line search is expected to " f"attempt 4 evaluations (i01s04)" ], header="seisflows3 example 2", border="=")) # Dynamically traverse sys.argv to get user-input command line. Cannot # use argparser here because we're being called by SeisFlows CLI tool which # is occupying argparser if len(sys.argv) > 1: sf3ex2d = SF3PyatoaEx2D() sf3ex2d.main()