def export_residuals(self, path): """ File transfer utility. Export residuals to disk. :type path: str :param path: path to save residuals """ if self.taskid == 0: self.logger.debug(f"exporting residuals to:\n{path}") unix.mkdir(os.path.join(path, "residuals")) src = os.path.join(self.cwd, "residuals") # If this residuals directory has not been created, something # has gone wrong with the preprocessing and workflow cannot proceed if not os.path.exists(src): print( msg.cli( "The Solver function 'export_residuals' expected " "'residuals' directories to be created but could not " "find them and cannot continue the workflow. Please " "check the preprocess.prepare_eval_grad() function", header="preprocess error", border="=")) sys.exit(-1) dst = os.path.join(path, "residuals", self.source_name) unix.mv(src, dst)
def smooth(self, input_path, **kwargs): """ Specfem2D requires additional model parameters in directory to perform the xsmooth_sem task. This function will copy these files into the directory before performing the base smooth operations. Kwargs should match arguments of solver.base.smooth() .. note:: This operation is usually run with run(single=True) so only one task will be performing these operations. :type input_path: str :param input_path: path to data """ # Redundant to 'base' class but necessary if not exists(input_path): unix.mkdir(input_path) unix.cd(self.cwd) unix.cd("DATA") # Copy over only the files that are required. Won't execute if no match files = [] for tag in ["jacobian", "NSPEC_ibool", "x", "y", "z"]: files += glob(f"*_{tag}.bin") for src in files: unix.cp(src=src, dst=input_path) super().smooth(input_path=input_path, **kwargs)
def cleanup_xspecfem2d_run(self, choice=None): """ Do some cleanup after running the SPECFEM2D binaries to make sure files are in the correct locations, and rename the OUTPUT_FILES directory so that it does not get overwritten by subsequent runs :type choice: str :param choice: Rename the OUTPUT_FILES directory with a suffix tag msut be 'INIT' or 'TRUE'. If None, will not rename but the """ cd(self.workdir_paths.workdir) print("> Cleaning up after xspecfem2d, setting up for new run") # SPECFEM2D outputs its models in the DATA/ directory by default, # while SeisFlows3 expects this in the OUTPUT_FILES/ directory (which is # the default in SPECFEM3D) mv(glob.glob("DATA/*bin"), self.workdir_paths.output) if choice == "INIT": mv(self.workdir_paths.output, self.workdir_paths.model_init) # Create a new OUTPUT_FILES/ directory for TRUE run rm(self.workdir_paths.output) mkdir(self.workdir_paths.output) elif choice == "TRUE": mv(self.workdir_paths.output, self.workdir_paths.model_true)
def setup(self): """ Sets up nonlinear optimization machinery """ # All ptimization statistics text files will be written to path_stats path_stats = os.path.join(PATH.WORKDIR, CFGPATHS.STATSDIR) unix.mkdir(path_stats) # Line search machinery is defined externally as a plugin class self.line_search = getattr(line_search, PAR.LINESEARCH)( step_count_max=PAR.STEPCOUNTMAX, step_len_max=PAR.STEPLENMAX, log_file=os.path.join(path_stats, f"{self.line_search_log}.txt"), ) if PAR.PRECOND: self.precond = getattr(preconds, PAR.PRECOND)() else: self.precond = None # Instantiate all log files in stats/ directory as empty text files # OVERWRITES any existing stats/ log files that may already be there for key, val in vars(self).items(): if "log_" in key: self.write_stats(val) # Ensure that line search step count starts at 0 (workflow.intialize) self.write_stats(self.log_step_count, 0) unix.mkdir(PATH.OPTIMIZE) if "MODEL_INIT" in PATH: m_new = solver.merge(solver.load(PATH.MODEL_INIT)) self.save(self.m_new, m_new) self.check_model(m_new, self.m_new)
def initialize_solver_directories(self): """ Creates directory structure expected by SPECFEM3D (bin/, DATA/) copies executables, and prepares input files. Executables must be supplied by user as there is no mechanism for automatically compiling from source Directories will act as completely independent Specfem run directories. This allows for embarrassing parallelization while avoiding the need for intra-directory communications, at the cost of redundancy and extra files. """ if self.taskid == 0: self.logger.info(f"initializing {PAR.NTASK} solver directories") unix.mkdir(self.cwd) unix.cd(self.cwd) # Create directory structure for cwd_dir in [ "bin", "DATA", "OUTPUT_FILES/DATABASES_MPI", "traces/obs", "traces/syn", "traces/adj", self.model_databases, self.kernel_databases ]: unix.mkdir(cwd_dir) # Copy exectuables into the bin/ directory src = glob(os.path.join(PATH.SPECFEM_BIN, "*")) dst = os.path.join("bin", "") unix.cp(src, dst) # Copy all input files except source files src = glob(os.path.join(PATH.SPECFEM_DATA, "*")) src = [_ for _ in src if self.source_prefix not in _] dst = os.path.join("DATA", "") unix.cp(src, dst) # Symlink event source specifically, strip the source name as SPECFEM # just expects `source_name` src = os.path.join(PATH.SPECFEM_DATA, f"{self.source_prefix}_{self.source_name}") dst = os.path.join("DATA", self.source_prefix) unix.ln(src, dst) if self.taskid == 0: mainsolver = os.path.join(PATH.SOLVER, "mainsolver") # Symlink taskid_0 as mainsolver in solver directory for convenience if not os.path.exists(mainsolver): unix.ln(self.cwd, mainsolver) self.logger.debug(f"source {self.source_name} symlinked as " f"mainsolver") else: # Copy the initial model from mainsolver into current directory # Avoids the need to run multiple instances of xgenerate_databases src = os.path.join(PATH.SOLVER, "mainsolver", "OUTPUT_FILES", "DATABASES_MPI") dst = os.path.join(self.cwd, "OUTPUT_FILES", "DATABASES_MPI") unix.cp(src, dst) self.check_solver_parameter_files()
def save_kernels_sum(self): """ Same summed kernels into the output directory """ src = os.path.join(PATH.SCRATCH, "kernels", "sum") dst = os.path.join(PATH.OUTPUT, "kernels") unix.mkdir(dst) unix.cp(src, dst)
def clean(self): """ Clean up solver-dependent run directory by removing the OUTPUT_FILES/ directory """ unix.cd(self.cwd) unix.rm("OUTPUT_FILES") unix.mkdir("OUTPUT_FILES")
def save_kernels(self): """ Save individual kernels into the output directory """ src = os.path.join(PATH.SCRATCH, "kernels") dst = PATH.OUTPUT unix.mkdir(dst) unix.cp(src, dst)
def setup(self): """ Set up the LBFGS optimization schema """ super().setup() # Create a separate directory for LBFGS matters unix.cd(PATH.OPTIMIZE) unix.mkdir(self.LBFGS_dir)
def setup(self): """ Sets up data preprocessing machinery by establishing an internally defined directory structure that will be used to store the outputs of the preprocessing workflow Akin to an __init__ class, but to be called externally by the workflow. """ unix.mkdir(PATH.PREPROCESS)
def smooth(self, input_path, output_path, parameters=None, span_h=0., span_v=0., output="solver.log"): """ Postprocessing wrapper: xsmooth_sem Smooths kernels by convolving them with a Gaussian. .. note:: paths require a trailing `/` character when calling xsmooth_sem .. note:: It is ASSUMED that this function is being called by system.run(single=True) so that we can use the main solver directory to perform the kernel smooth task :type input_path: str :param input_path: path to data :type output_path: str :param output_path: path to export the outputs of xcombine_sem :type parameters: list :param parameters: optional list of parameters, defaults to `self.parameters` :type span_h: float :param span_h: horizontal smoothing length in meters :type span_v: float :param span_v: vertical smoothing length in meters :type output: str :param output: file to output stdout to """ if parameters is None: parameters = self.parameters if not exists(output_path): unix.mkdir(output_path) # Apply smoothing operator inside scratch/solver/* unix.cd(self.cwd) # mpiexec ./bin/xsmooth_sem SMOOTH_H SMOOTH_V name input output use_gpu for name in parameters: call_solver(mpiexec=PAR.MPIEXEC, executable=" ".join([ "bin/xsmooth_sem", str(span_h), str(span_v), f"{name}_kernel", os.path.join(input_path, ""), os.path.join(output_path, ""), ".false" ]), output=output) # Rename output files files = glob(os.path.join(output_path, "*")) unix.rename(old="_smooth", new="", names=files)
def export_model(self, path): """ File transfer utility to move a SPEFEM2D model from the DATA directory to an external path location :type path: str :param path: path to export the SPECFEM2D model :return: """ unix.mkdir(path) unix.cp(src=glob(os.path.join(self.cwd, "DATA", "*.bin")), dst=path)
def clean(self): """ Cleans directories in which function and gradient evaluations were carried out """ self.logger.info(msg.mnr("CLEANING WORKDIR FOR NEXT ITERATION")) unix.rm(PATH.GRAD) unix.rm(PATH.FUNC) unix.mkdir(PATH.GRAD) unix.mkdir(PATH.FUNC)
def smooth_legacy(input_path='', output_path='', parameters=[], span=0.): """ :param input_path: :param output_path: :param parameters: :param span: :return: """ solver = sys.modules['seisflows_solver'] PATH = sys.modules['seisflows_paths'] if not exists(input_path): raise Exception if not exists(output_path): unix.mkdir(output_path) if solver.mesh_properties.nproc != 1: raise NotImplementedError # intialize arrays kernels = {} for key in parameters or solver.parameters: kernels[key] = [] coords = {} for key in ['x', 'z']: coords[key] = [] # read kernels for key in parameters or solver.parameters: kernels[key] += solver.io.read_slice(input_path, key+'_kernel', 0) if not span: return kernels # read coordinates for key in ['x', 'z']: coords[key] += solver.io.read_slice(PATH.MODEL_INIT, key, 0) mesh = array.stack(coords['x'][0], coords['z'][0]) # apply smoother for key in parameters or solver.parameters: kernels[key] = [array.meshsmooth(kernels[key][0], mesh, span)] # write smooth kernels for key in parameters or solver.parameters: solver.io.write_slice(kernels[key][0], output_path, key+'_kernel', 0)
def save_kwargs(self, classname, method, kwargs): """ Save key word arguments as a pickle object. :type classname: str :param classname: the class to run :type method: str :param method: the method from the given `classname` to run """ kwargspath = os.path.join(PATH.OUTPUT, "kwargs") kwargsfile = os.path.join(kwargspath, f"{classname}_{method}.p") unix.mkdir(kwargspath) saveobj(kwargsfile, kwargs)
def initialize_solver_directories(self): """ Creates solver directories in serial using a single node. Should only be run by master job. Differs from Base initialize_solver_directories() as this serial task will create directory structures for each source, rather than having each source create its own. However the internal dir structure is the same. """ for source_name in self.source_names: cwd = os.path.join(PATH.SOLVER, source_name) # Remove any existing scratch directory unix.rm(cwd) # Create internal directory structure, change into directory to make # all actions RELATIVE path actions unix.mkdir(cwd) unix.cd(cwd) for cwd_dir in [ "bin", "DATA", "OUTPUT_FILES/DATABASES_MPI", "traces/obs", "traces/syn", "traces/adj" ]: unix.mkdir(cwd_dir) # Copy exectuables src = glob(os.path.join(PATH.SPECFEM_BIN, "*")) dst = os.path.join("bin", "") unix.cp(src, dst) # Copy all input files except source files src = glob(os.path.join(PATH.SPECFEM_DATA, "*")) src = [_ for _ in src if self.source_prefix not in _] dst = os.path.join("DATA", "") unix.cp(src, dst) # symlink event source specifically src = os.path.join(PATH.SPECFEM_DATA, f"{self.source_prefix}_{source_name}") dst = os.path.join("DATA", self.source_prefix) unix.ln(src, dst) if source_name == self.mainsolver: # Symlink taskid_0 as mainsolver in solver directory unix.ln(source_name, os.path.join(PATH.SOLVER, "mainsolver")) # Only check the solver parameters once self.check_solver_parameter_files()
def setup_specfem2d_for_model_init(self): """ Make some adjustments to the original parameter file to. This function assumes it is running from inside the SPECFEM2D/DATA dir """ cd(self.workdir_paths.data) assert (os.path.exists("Par_file")), f"I cannot find the Par_file!" print("> Setting the SPECFEM2D Par_file for SeisFlows3 compatiblility") self.sf.sempar("setup_with_binary_database", 1) # create .bin files self.sf.sempar("save_model", "binary") # output model in .bin format self.sf.sempar("save_ASCII_kernels", ".false.") # kernels also .bin rm(self.workdir_paths.output) mkdir(self.workdir_paths.output)
def clean(self): """ Determine if forward simulation from line search can be carried over. We assume clean() is the final flow() argument so that we can update the thrifty status here. """ self.update_status() if self.thrifty: self.logger.info( msg.mnr("THRIFTY CLEANING WORKDIR FOR NEXT " "ITERATION")) unix.rm(PATH.GRAD) unix.mv(PATH.FUNC, PATH.GRAD) unix.mkdir(PATH.FUNC) else: super().clean()
def export_traces(self, path, prefix="traces/obs"): """ File transfer utility. Export traces to disk. :type path: str :param path: path to save traces :type prefix: str :param prefix: location of traces w.r.t self.cwd """ if self.taskid == 0: self.logger.debug("exporting traces to {path} {prefix}") unix.mkdir(os.path.join(path)) src = os.path.join(self.cwd, prefix) dst = os.path.join(path, self.source_name) unix.cp(src, dst)
def combine(self, input_path, output_path, parameters=None): """ Postprocessing wrapper: xcombine_sem Sums kernels from individual source contributions to create gradient. .. note:: The binary xcombine_sem simply sums matching databases (.bin) .. note:: It is ASSUMED that this function is being called by system.run(single=True) so that we can use the main solver directory to perform the kernel summation task :type input_path: str :param input_path: path to data :type output_path: str :param output_path: path to export the outputs of xcombine_sem :type parameters: list :param parameters: optional list of parameters, defaults to `self.parameters` """ if parameters is None: parameters = self.parameters if not exists(output_path): unix.mkdir(output_path) unix.cd(self.cwd) # Write the source names into the kernel paths file for SEM/ directory with open("kernel_paths", "w") as f: f.writelines([ os.path.join(input_path, f"{name}\n") for name in self.source_names ]) # Call on xcombine_sem to combine kernels into a single file for name in self.parameters: # e.g.: mpiexec ./bin/xcombine_sem alpha_kernel kernel_paths output call_solver(mpiexec=PAR.MPIEXEC, executable=" ".join([ f"bin/xcombine_sem", f"{name}_kernel", "kernel_paths", output_path ]))
def save(): """ Export the current session to disk """ logger.info("exporting current working environment to disk") output = sys.modules[PATH]["OUTPUT"] unix.mkdir(output) # Save the paths and parameters into a JSON file for name in [PAR, PATH]: fullfile = os.path.join(output, f"{name}.json") with open(fullfile, "w") as f: json.dump(sys.modules[name].__dict__, f, sort_keys=True, indent=4) # Save the current workflow as pickle objects for name in NAMES: fullfile = os.path.join(output, f"seisflows_{name}.p") with open(fullfile, "wb") as f: pickle.dump(sys.modules[f"seisflows_{name}"], f)
def export_kernels(self, path): """ File transfer utility. Export kernels to disk :type path: str :param path: path to save kernels """ if self.taskid == 0: self.logger.debug(f"exporting kernels to:\n{path}") unix.cd(self.kernel_databases) # Work around conflicting name conventions self.rename_kernels() src = glob("*_kernel.bin") dst = os.path.join(path, "kernels", self.source_name) unix.mkdir(dst) unix.mv(src, dst)
def export_model(self, path, parameters=None): """ File transfer utility. Export the model to disk. Performed by master solver. :type path: str :param path: path to save model :type parameters: list :param parameters: list of parameters that define the model """ if parameters is None: parameters = self.parameters if self.taskid == 0: unix.mkdir(path) for key in parameters: files = glob(os.path.join(self.model_databases, f"*{key}.bin")) unix.cp(files, path)
def setup(self): """ Sets up data preprocessing machinery by dynamicalyl loading the misfit, adjoint source type, and specifying the expected file type for input and output seismic data. """ unix.mkdir(PATH.PREPROCESS) # Define misfit function and adjoint trace generator if PAR.MISFIT: self.logger.debug(f"misfit function is: '{PAR.MISFIT}'") self.misfit = getattr(misfit, PAR.MISFIT.lower()) self.adjoint = getattr(adjoint, PAR.MISFIT.lower()) elif PAR.BACKPROJECT: self.logger.debug(f"backproject function is: '{PAR.BACKPROJECT}'") self.adjoint = getattr(adjoint, PAR.BACKPROJECT.lower()) # Define seismic data reader and writer self.reader = getattr(readers, PAR.FORMAT) self.writer = getattr(writers, PAR.FORMAT)
def apply_hess(self, path): """ High level solver interface that computes action of Hessian on a given model vector. A gradient evaluation must have already been carried out. TODO preprocess has no function prepare_apply_hess() :type path: str :param path: directory to which output files are exported """ raise NotImplementedError unix.cd(self.cwd) self.import_model(path) unix.mkdir("traces/lcg") self.forward("traces/lcg") preprocess.prepare_apply_hess(self.cwd) self.adjoint() self.export_kernels(path)
def _save_quantity(self, filepaths, tag="", path_out=""): """ Repeatable convenience function to save quantities from the scratch/ directory to the output/ directory :type filepaths: list :param filepaths: full path to files that should be saved to output/ :type tag: str :param tag: tag for saving the files in PATH.OUTPUT. If not given, will save directly into the output/ directory :type path_out: str :param path_out: overwrite the default output path file naming """ if not path_out: path_out = os.path.join(PATH.OUTPUT, tag) if not os.path.exists(path_out): unix.mkdir(path_out) for src in filepaths: dst = os.path.join(path_out, os.path.basename(src)) unix.cp(src, dst)
def checkpoint(self, path, classname, method, kwargs): """ Writes the SeisFlows3 working environment to disk so that new tasks can be executed in a separate/new/restarted working environment. :type path: str :param path: path to save the checkpointed pickle files to :type classname: str :param classname: name of the class to save :type method: str :param method: the specific function to be checkpointed :type kwargs: dict :param kwargs: dictionary to pass to object saving """ self.logger.debug("checkpointing working environment to disk") argspath = os.path.join(path, "kwargs") argsfile = os.path.join(argspath, f"{classname}_{method}.p") unix.mkdir(argspath) with open(argsfile, "wb") as f: pickle.dump(kwargs, f) save()
def create_specfem2d_working_directory(self): """ Create the working directory where we will generate our initial and final models using one of the SPECFEM2D examples """ assert (os.path.exists(self.sem2d_paths["example"])), ( f"SPECFEM2D/EXAMPLE directory: '{self.sem2d['example']}' " f"does not exist, please check this path and try again.") # Incase this example has written before, remove dir. that were created rm(self.workdir_paths.workdir) mkdir(self.workdir_paths.workdir) # Copy the binary executables and DATA from the SPECFEM2D example cp(self.sem2d_paths.bin, self.workdir_paths.bin) cp(self.sem2d_paths.example_data, self.workdir_paths.data) # Make sure that SPECFEM2D can find the expected files in the DATA/ dir cd(self.workdir_paths.data) rm("SOURCE") ln("SOURCE_001", "SOURCE") rm("Par_file") ln("Par_file_Tape2007_onerec", "Par_file")
def save(self, path, model, prefix="reg1_", suffix=""): """ Writes SPECFEM3D_GLOBE transerverly isotropic model :type path: str :param path: :type model :param model: :type prefix: str :param prefix: prefix that begins the name of the model parameters :type suffix: str :param suffix: that follow the name of model parameters """ unix.mkdir(path) for iproc in range(self.mesh_properties.nproc): for check_key in ["vpv", "vph", "vsv", "vsh", "eta"]: if check_key in self.parameters: savebin(model[key][iproc], path, iproc, prefix + key + suffix) elif 'kernel' in suffix: pass else: src = PATH.OUTPUT + '/' + 'model_init' dst = path copybin(src, dst, iproc, prefix + key + suffix) if 'rho' in self.parameters: savebin(model['rho'][iproc], path, iproc, prefix + 'rho' + suffix) elif 'kernel' in suffix: pass else: src = PATH.OUTPUT + '/' + 'model_init' dst = path copybin(src, dst, iproc, prefix + 'rho' + suffix)
def save(self, save_dict, path, parameters=None, prefix="", suffix=""): """ Solver I/O: Saves SPECFEM2D/3D models or kernels :type save_dict: dict or Container :param save_dict: model stored as a dictionary or Container :type path: str :param path: directory from which model is read :type parameters: list :param parameters: list of material parameters to be read :type prefix: str :param prefix: optional filename prefix :type suffix: str :param suffix: optional filename suffix, eg '_kernel' """ unix.mkdir(path) if parameters is None: parameters = self.parameters # Fill in any missing parameters missing_keys = diff(parameters, save_dict.keys()) for iproc in range(self.mesh_properties.nproc): for key in missing_keys: save_dict[key] += self.io.read_slice( path=PATH.MODEL_INIT, parameters=f"{prefix}{key}{suffix}", iproc=iproc) # Write slices to disk for iproc in range(self.mesh_properties.nproc): for key in parameters: self.io.write_slice(data=save_dict[key][iproc], path=path, parameters=f"{prefix}{key}{suffix}", iproc=iproc)