def _get_model_smoothing_task(self, tasks=None): if tasks is None: tasks = {} if max(self.roughness_decay_smoothing_length) > 0.0: # We either smooth the physical model and then map the results back # to the internal parameterization # Or we smooth the relative perturbations with respect to if self.roughness_decay_type == "absolute": model_to_smooth = self.model_path else: model_to_smooth = os.path.join( self.regularization_dir, f"relative_perturbation_{self.iteration_name}.h5", ) shutil.copy(self.model_path, model_to_smooth) write_xdmf(model_to_smooth) # relative perturbation = (latest - start) / start theta_prev = self.get_h5_data(self.model_path) theta_0 = self.get_h5_data( self._get_path_for_iteration(0, self.model_path)) theta_prev[theta_0 != 0] = ( theta_prev[theta_0 != 0] / theta_0[theta_0 != 0] - 1) self.set_h5_data(model_to_smooth, theta_prev) tasks["roughness_decay"] = { "reference_model": str(self.comm.lasif.get_master_model()), "model_to_smooth": str(model_to_smooth), "smoothing_lengths": self.roughness_decay_smoothing_length, "smoothing_parameters": self.parameters, "output_location": str(self.smoothed_model_path), } return tasks
def perform_smoothing(self): tasks = self._get_model_smoothing_task() if max(self.update_smoothing_length) > 0.0: tasks["smooth_raw_update"] = { "reference_model": str(self.comm.lasif.get_master_model()), "model_to_smooth": str(self.raw_update_path), "smoothing_lengths": self.update_smoothing_length, "smoothing_parameters": self.parameters, "output_location": str(self.smooth_update_path), } if len(tasks.keys()) > 0: reg_helper = RegularizationHelper( comm=self.comm, iteration_name=self.iteration_name, tasks=tasks) reg_helper.monitor_tasks() else: raise InversionsonError( "We require some sort of smoothing in Adam Optimization") # Write XDFMs if max(self.update_smoothing_length) > 0.0: write_xdmf(self.smooth_update_path) if max(self.roughness_decay_smoothing_length) > 0.0: write_xdmf(self.smoothed_model_path)
def _compute_raw_update(self): """Computes the raw update""" self.print("SGD with Momentum: Computing raw update...", line_above=True) # Read task toml iteration_number = self.task_dict["iteration_number"] + 1 indices = self.get_parameter_indices(self.raw_gradient_path) # scale the gradients, because they can be tiny and this leads to issues g_t = self.get_h5_data(self.raw_gradient_path) * self.grad_scaling_fac if np.sum(np.isnan(g_t)) > 1: raise Exception("NaNs were found in the raw gradient." "Something must be wrong.") if iteration_number == 1: # Initialize moments if needed shutil.copy(self.raw_gradient_path, self.moment_path) write_xdmf(self.moment_path) with h5py.File(self.moment_path, "r+") as h5: data = h5["MODEL/data"] # initialize with zeros for i in indices: data[:, i, :] = np.zeros_like(data[:, i, :]) v_t = self.beta * self.get_h5_data( self.moment_path) + (1 - self.beta) * g_t # Store first moment shutil.copy( self.moment_path, self._get_path_for_iteration(self.iteration_number + 1, self.moment_path), ) self.set_h5_data( self._get_path_for_iteration(self.iteration_number + 1, self.moment_path), v_t, ) # Correct bias v_t = v_t / (1 - self.beta**(self.iteration_number + 1)) update = self.alpha * v_t if np.sum(np.isnan(update)) > 1: raise Exception("NaNs were found in the raw update." "Check if the gradient is not excessively small") # Write raw update to file for smoothing shutil.copy(self.raw_gradient_path, self.raw_update_path) self.set_h5_data(self.raw_update_path, update)
def update_model(self, verbose): """ This task takes the raw gradient and does all the regularisation and everything to update the model. """ if self.comm.project.meshes == "multi-mesh": self.comm.lasif.move_gradient_to_cluster() if not self.task_dict["summing_completed"]: grad_summer = GradientSummer(comm=self.comm) grad_summer.sum_gradients( events=self.comm.project.non_val_events_in_iteration, output_location=self.raw_gradient_path, batch_average=True, sum_vpv_vph=True, store_norms=True, ) write_xdmf(self.raw_gradient_path) self.task_dict["summing_completed"] = True self._update_task_file() else: self.print("Summing already done") if not self.task_dict["raw_update_completed"]: self._update_model(raw=True, smooth=False, verbose=verbose) self.task_dict["raw_update_completed"] = True self._update_task_file() else: self.print("Raw updating already completed") if not self.task_dict["smoothing_completed"]: self.perform_smoothing() self.task_dict["smoothing_completed"] = True self._update_task_file() else: self.print("Smoothing already done") if not self.task_dict["smooth_update_completed"]: self._update_model(raw=False, smooth=True, verbose=verbose) self.task_dict["smooth_update_completed"] = True self._update_task_file() else: self.print("Smooth updating already completed") if not self.task_dict["iteration_finalized"]: self._finalize_iteration(verbose=verbose) self.task_dict["iteration_finalized"] = True self._update_task_file() else: self.print("Iteration already finalized") self.finish_task()
def finish_task(self): paths = ["raw_update_path", "model", "raw_gradient_path"] if max(self.update_smoothing_length) > 0.0: paths.append("smooth_update_path") if max(self.roughness_decay_smoothing_length) > 0.0: paths.append("smoothed_model_path") complete_checks = [ "smoothing_completed", "gradient_completed", "iteration_finalized", "forward_submitted", "raw_update_completed", "smooth_update_completed", "misfit_completed", "summing_completed", "validated:", ] for path in paths: if path in self.task_dict.keys(): if not os.path.exists(self.task_dict[path]): raise InversionsonError( f"Trying to finish task but it can't find {self.task_dict[path]}" ) for complete_check in complete_checks: if complete_check in self.task_dict.keys(): if not self.task_dict[complete_check]: raise InversionsonError( f"Trying to finish task but {complete_check} is not completed" ) self.task_dict["finished"] = True if self.task_dict["task"] == "update_model": self._update_task_file() target_location = self._get_path_for_iteration( self.iteration_number + 1, self.model_path) # Moving the new model into its place, moves the iteration property to the next one. shutil.move( self.tmp_model_path, target_location, ) write_xdmf(target_location) else: self._update_task_file()
def set_h5_data(self, filename, data, create_xdmf=True): """Writes the data with shape [:, indices :]. Requires existing file.""" if not os.path.exists(filename): raise Exception("only works on existing files.") indices = self.get_parameter_indices(filename) with h5py.File(filename, "r+") as h5: dat = h5["MODEL/data"] data_copy = dat[:, :, :].copy() # avoid writing the file many times. work on array in memory for i in range(len(indices)): data_copy[:, indices[i], :] = data[:, i, :] # writing only works in sorted order. This sort can only happen after # the above executed to preserve the ordering that data came in indices.sort() dat[:, indices, :] = data_copy[:, indices, :] if create_xdmf: write_xdmf(filename)
def _init_directories(self): """ Build directory structure. """ folders = [ self.model_dir, self.average_model_dir, self.raw_gradient_dir, self.moment_dir, self.raw_update_dir, self.smooth_update_dir, self.task_dir, self.regularization_dir, self.smoothed_model_dir, self.gradient_norm_dir, ] for folder in folders: if not os.path.exists(folder): os.mkdir(folder) shutil.copy(self.initial_model, self.model_path) write_xdmf(self.model_path)
def _compute_raw_update(self): """Computes the raw update""" self.print("Adam: Computing raw update...", line_above=True) # Read task toml iteration_number = self.task_dict["iteration_number"] + 1 indices = self.get_parameter_indices(self.raw_gradient_path) # scale the gradients, because they can be tiny and this leads to issues g_t = self.get_h5_data(self.raw_gradient_path) * self.grad_scaling_fac if np.sum(np.isnan(g_t)) > 1: raise Exception("NaNs were found in the raw gradient." "Something must be wrong.") if iteration_number == 1: # Initialize moments if needed shutil.copy(self.raw_gradient_path, self.first_moment_path) write_xdmf(self.first_moment_path) with h5py.File(self.first_moment_path, "r+") as h5: data = h5["MODEL/data"] # initialize with zeros for i in indices: data[:, i, :] = np.zeros_like(data[:, i, :]) # Also initialize second moments with zeros shutil.copy(self.first_moment_path, self.second_moment_path) write_xdmf(self.second_moment_path) m_t = (self.beta_1 * self.get_h5_data(self.first_moment_path) + (1 - self.beta_1) * g_t) # Store first moment shutil.copy( self.first_moment_path, self._get_path_for_iteration(self.iteration_number + 1, self.first_moment_path), ) self.set_h5_data( self._get_path_for_iteration(self.iteration_number + 1, self.first_moment_path), m_t, ) # v_t was sometimes becoming too small, so enforce double precision v_t = self.beta_2 * self.get_h5_data( self.second_moment_path) + (1 - self.beta_2) * (g_t**2) # Store second moment shutil.copy( self.second_moment_path, self._get_path_for_iteration(self.iteration_number + 1, self.second_moment_path), ) self.set_h5_data( self._get_path_for_iteration(self.iteration_number + 1, self.second_moment_path), v_t, ) # Correct bias m_t = m_t / (1 - self.beta_1**(self.iteration_number + 1)) v_t = v_t / (1 - self.beta_2**(self.iteration_number + 1)) # ensure e is sufficiently small, even for the small gradient values # that we typically have. e = self.epsilon * np.mean(np.sqrt(v_t)) update = self.alpha * m_t / (np.sqrt(v_t) + e) max_upd = np.max(np.abs(update)) self.print(f"Max raw model update: {max_upd}") if max_upd > 3.0 * self.alpha: raise Exception("Raw update seems a bit large") if np.sum(np.isnan(update)) > 1: raise Exception("NaNs were found in the raw update." "Check if the gradient is not excessively small") # Write raw update to file for smoothing shutil.copy(self.raw_gradient_path, self.raw_update_path) self.set_h5_data(self.raw_update_path, update)