def _run_stage( self, stage_name: str, stage: StageBase, molecule: Ligand, results: WorkFlowResult, ) -> Ligand: """ A stage wrapper to run the stage and update the results workflow in place. """ home = os.getcwd() # update settings and set to running and save stage_result = StageResult(stage=stage.type, stage_settings=stage.dict(), status=Status.Running) results.results[stage_name] = stage_result results.modified_date = datetime.now().strftime("%Y_%m_%d") results.to_file(filename=self._results_fname) make_and_change_into(name=stage_name) try: # run the stage and save the result result_mol = stage.run( molecule=molecule, qc_spec=self.qc_options, local_options=self.local_resources, ) stage_result.status = Status.Done results.current_molecule = result_mol except MissingReferenceData: # this means there are no torsions to scan so simulate it working stage_result.status = Status.Done results.current_molecule = molecule except Exception as e: import traceback # save the error do not update the current molecule stage_result.status = Status.Error stage_result.error = traceback.extract_tb(e.__traceback__).format() os.chdir(home) results.results[stage_name] = stage_result results.to_file(self._results_fname) # write the exception to file with open("QUBEKit.err", "w") as output: traceback.print_exc(file=output) raise WorkFlowExecutionError( f"The workflow stopped unexpectedly due to the following error at stage: {stage_name}" ) from e # move back os.chdir(home) # update the results results.results[stage_name] = stage_result results.to_file(self._results_fname) return results.current_molecule
def prep_for_fitting(self, molecule: Ligand) -> List[str]: """ For the given ligand prep the input files ready for torsion profile fitting. Args: molecule: The molecule object that we need to prep for fitting, this should have qm reference data stored in molecule.qm_scans. Note: We assume we are already in the targets folder. Returns: A list of target folder names made by this target. Raises: MissingReferenceData: If the molecule does not have any torsion drive reference data saved in molecule.qm_scans. """ # make sure we have data if not molecule.qm_scans: raise MissingReferenceData( f"Can not prepare a forcebalance fitting target for {molecule.name} as the reference data is missing!" ) # write out the qdata and other input files for each scan target_folders = [] # keep track of where we start base_folder = os.getcwd() # loop over each scanned bond and make a target folder for scan in molecule.qm_scans: task_name = ( f"{self.target_name}_{scan.central_bond[0]}_{scan.central_bond[1]}" ) target_folders.append(task_name) make_and_change_into(name=task_name) # make the pdb topology file if molecule.has_ub_terms(): molecule._to_ub_pdb(file_name="molecule") else: molecule.to_file(file_name="molecule.pdb") # write the qdata file export_torsiondrive_data(molecule=molecule, tdrive_data=scan) # make the metadata self.make_metadata(torsiondrive_data=scan) # now move back to the base os.chdir(base_folder) return target_folders
def torsion_test(self): """ Take optimized xml file and test the agreement with QM by doing a torsion drive and checking the single point energies for each rotatable dihedral. """ # Run the scanner for i, self.scan in enumerate(self.molecule.scan_order): # move into the scan folder that should have been made make_and_change_into(f"SCAN_{self.scan[0]}_{self.scan[1]}") # Move into testing folder try: rmtree("testing_torsion") except FileNotFoundError: pass make_and_change_into("testing_torsion") # Run torsiondrive # step 2 MM torsion scan # with wavefront propagation, returns the new set of coords these become the new scan coords self.scan_coords = self.drive_mm("torsiondrive") # step 4 calculate the single point energies self.qm_energy = self.single_point() # Normalise the qm energy again using the qm reference energy self.qm_normalise() # Calculate the mm energy self.reset_torsions() # Use the parameters to get the current energies self.mm_energy = deepcopy(self.starting_energy) # Graph the energy self.plot_results(name="testing_torsion", torsion_test=True) os.chdir("../../")
def single_point_matching(self, fitting_error, opt_parameters): """A function the call the single point matching method of parameter refinement. method (fit only new generation) ------------------- 1) take parameters from the initial scipy fitting. 2) Do a MM torsion scan with the parameters and get the rmsd error and energy error between this new surface and the qm optimised surface 3) Now fit to the qm surface again using a small restrain penalty """ converged = False # Set the optimisation method if we have a hybrd method we need to try and take the last option self.method = self.methods.get( self.molecule.opt_method.split("_")[-1], None) print(f"The optimisation method is {self.method}") # put in the objective dict objective = { "fitting_error": [], "energy_error": [], "rmsd": [], "total": [], "parameters": [], } iteration = 1 # start the main optimizer loop by calculating new single point energies while not converged: # move into the first iteration folder make_and_change_into(f"Iteration_{iteration}") # step 2 MM torsion scan # with wavefront propagation, returns the new set of coords these become the new scan coords self.scan_coords = self.drive_mm("torsiondrive") # also save these coords to the coords store self.coords_store = deepcopy(self.coords_store + self.scan_coords) # step 3 calculate the rmsd for these structures compared to QM ones rmsd_vector = self.scan_rmsd(self.scan_coords) # Calculate how well the new relative surface represents the QM one energy_error = self.objective(opt_parameters) # this now acts as the intial energy for the next fit self.initial_energy = deepcopy(self.mm_energy) # add the results to the dictionary objective["fitting_error"].append(fitting_error) objective["energy_error"].append(energy_error) objective["rmsd"].append(sum(rmsd_vector) / len(rmsd_vector)) objective["total"].append(energy_error + sum(rmsd_vector) / len(rmsd_vector)) objective["parameters"].append(opt_parameters) # Print the results of the iteration self.optimiser_log.write("After refinement the errors are:\n") for error, value in objective.items(): self.optimiser_log.write(f"{error}: {value}\n") self.optimiser_log.flush() # Check convergence if objective["total"][-1] <= 0.25: print( f"Fitting converged after {iteration} iterations exiting..." ) # This takes us out of the refinement loop and stops any parameter changes break # Has the error converged? if iteration < 7: # Don't move too far away from the last set of optimised parameters if they got a good fit self.starting_params = opt_parameters # turn on the penalty if the error is getting close to the threshold if energy_error <= 1.5: self.l_pen = 0.15 else: print("Turning off penalty due to large errors.") self.l_pen = 0 # optimise using the scipy method for the new structures with a penalty to remain close to the old fitting_error, opt_parameters = self.scipy_optimiser() # update the parameters in the fitting vector and the molecule for the MM scans self.update_tor_vec(opt_parameters) self.update_mol() # use the parameters to get the current energies self.mm_energy = deepcopy(self.mm_energies()) self.optimiser_log.write( f"Results for fitting iteration: {iteration}\n") self.optimiser_log.flush() # plot the fitting graph this iteration self.plot_results(name=f"SP_iter_{iteration}") # move out of the folder os.chdir("../") # add 1 to the iteration iteration += 1 else: # use the parameters to get the current energies self.mm_energy = deepcopy(self.mm_energies()) # print the final iteration energy prediction self.plot_results(name=f"SP_iter_{iteration}") os.chdir("../") break # find the minimum total error index in list min_error = min(objective["total"]) min_index = objective["total"].index(min_error) # gather the parameters with the lowest error, not always the last parameter set final_parameters = deepcopy(objective["parameters"][min_index]) # final_parameters = deepcopy(objective['parameters'][-1]) final_error = objective["total"][min_index] # final_error = objective['total'][-1] self.optimiser_log.write( f"The lowest error:{final_error}\nThe corresponding parameters:{final_parameters}\n" f"were found on iteraion {min_index + 1}\n") self.optimiser_log.flush() # now we want to see how well we have captured the initial QM energy surface # reset the scan coords to the initial values self.scan_coords = self.initial_coords # Also save the last mm surface generated by the parameters final_surface_energy = deepcopy(self.mm_energy) # get the energy surface for these final parameters at the qm geometry energy_error = self.objective(final_parameters) self.optimiser_log.write( f"The final error at the qm optimised geometries is {energy_error}\n" ) # get the starting energies back to the initial values before fitting self.initial_energy = self.starting_energy # plot the results this is a graph of the starting QM surface and how well we can remake it self.optimiser_log.write("The final stage 2 fitting results:\n") self.optimiser_log.flush() self.plot_results( name="Stage2_Single_point_fit", extra_points={ "Final parameters MM geometry": final_surface_energy }, ) # Plot the convergence of the energy rmsd and total errors self.plot_convergence(objective) # Plot the correlation between the single point energies over all structures sampled in the fitting # Using the initial and final parameters self.plot_correlation(final_parameters) return final_error, final_parameters
def drive_mm(self, engine): """Drive the torsion again using MM to get new structures.""" # Write an xml file with the new parameters # Move into a temporary folder torsion drive gives an error if we use temp directory module temp = f"{engine}_scan" try: rmtree(temp) except FileNotFoundError: pass make_and_change_into(temp) # Write out a pdb file of the qm optimised geometry self.molecule.to_file(file_name="openmm.pdb") # Also need an xml file for the molecule to use in geometric self.molecule.write_parameters(name="openmm") # openmm.pdb and input.xml are the expected names for geometric with open("log.txt", "a+") as log: if engine == "torsiondrive": if self.molecule.constraints_file is not None: os.system("mv ../constraints.txt .") self.write_dihedrals() step_size = self.molecule.increments[self.scan] sp.run( f"torsiondrive-launch -e openmm openmm.pdb dihedrals.txt -v -g {step_size}" f' {self.molecule.constraints_file if self.molecule.constraints_file is not None else ""}', shell=True, stderr=log, stdout=log, check=True, ) self.molecule.read_tdrive(self.scan) positions = self.molecule.qm_scans[self.scan][1] elif engine == "geometric": if self.molecule.constraints_file is not None: os.system("mv ../constraints.txt .") else: self.make_constraints() sp.run( "geometric-optimize --epsilon 0.0 --maxiter 500 --qccnv true --pdb openmm.pdb " "--engine openmm state.xml qube_constraints.txt", shell=True, stdout=log, stderr=log, check=True, ) else: raise NotImplementedError( "Invalid torsion engine. Please use torsiondrive or geometric" ) # move back to the master folder os.chdir("../") return positions