Exemple #1
0
    def _run_stage(
        self,
        stage_name: str,
        stage: StageBase,
        molecule: Ligand,
        results: WorkFlowResult,
    ) -> Ligand:
        """
        A stage wrapper to run the stage and update the results workflow in place.
        """
        home = os.getcwd()
        # update settings and set to running and save
        stage_result = StageResult(stage=stage.type,
                                   stage_settings=stage.dict(),
                                   status=Status.Running)
        results.results[stage_name] = stage_result
        results.modified_date = datetime.now().strftime("%Y_%m_%d")
        results.to_file(filename=self._results_fname)
        make_and_change_into(name=stage_name)
        try:
            # run the stage and save the result
            result_mol = stage.run(
                molecule=molecule,
                qc_spec=self.qc_options,
                local_options=self.local_resources,
            )
            stage_result.status = Status.Done
            results.current_molecule = result_mol
        except MissingReferenceData:
            # this means there are no torsions to scan so simulate it working
            stage_result.status = Status.Done
            results.current_molecule = molecule
        except Exception as e:
            import traceback

            # save the error do not update the current molecule
            stage_result.status = Status.Error
            stage_result.error = traceback.extract_tb(e.__traceback__).format()
            os.chdir(home)
            results.results[stage_name] = stage_result
            results.to_file(self._results_fname)
            # write the exception to file
            with open("QUBEKit.err", "w") as output:
                traceback.print_exc(file=output)

            raise WorkFlowExecutionError(
                f"The workflow stopped unexpectedly due to the following error at stage: {stage_name}"
            ) from e

        # move back
        os.chdir(home)
        # update the results
        results.results[stage_name] = stage_result
        results.to_file(self._results_fname)
        return results.current_molecule
    def prep_for_fitting(self, molecule: Ligand) -> List[str]:
        """
        For the given ligand prep the input files ready for torsion profile fitting.

        Args:
            molecule: The molecule object that we need to prep for fitting, this should have qm reference data stored in molecule.qm_scans.

        Note:
            We assume we are already in the targets folder.

        Returns:
            A list of target folder names made by this target.

        Raises:
            MissingReferenceData: If the molecule does not have any torsion drive reference data saved in molecule.qm_scans.
        """
        # make sure we have data
        if not molecule.qm_scans:
            raise MissingReferenceData(
                f"Can not prepare a forcebalance fitting target for {molecule.name} as the reference data is missing!"
            )

        # write out the qdata and other input files for each scan
        target_folders = []
        # keep track of where we start
        base_folder = os.getcwd()

        # loop over each scanned bond and make a target folder
        for scan in molecule.qm_scans:
            task_name = (
                f"{self.target_name}_{scan.central_bond[0]}_{scan.central_bond[1]}"
            )
            target_folders.append(task_name)
            make_and_change_into(name=task_name)
            # make the pdb topology file
            if molecule.has_ub_terms():
                molecule._to_ub_pdb(file_name="molecule")
            else:
                molecule.to_file(file_name="molecule.pdb")
            # write the qdata file
            export_torsiondrive_data(molecule=molecule, tdrive_data=scan)
            # make the metadata
            self.make_metadata(torsiondrive_data=scan)
            # now move back to the base
            os.chdir(base_folder)

        return target_folders
Exemple #3
0
    def torsion_test(self):
        """
        Take optimized xml file and test the agreement with QM by doing a torsion drive and checking the single
        point energies for each rotatable dihedral.
        """

        # Run the scanner
        for i, self.scan in enumerate(self.molecule.scan_order):
            # move into the scan folder that should have been made
            make_and_change_into(f"SCAN_{self.scan[0]}_{self.scan[1]}")

            # Move into testing folder
            try:
                rmtree("testing_torsion")
            except FileNotFoundError:
                pass

            make_and_change_into("testing_torsion")

            # Run torsiondrive
            # step 2 MM torsion scan
            # with wavefront propagation, returns the new set of coords these become the new scan coords
            self.scan_coords = self.drive_mm("torsiondrive")

            # step 4 calculate the single point energies
            self.qm_energy = self.single_point()

            # Normalise the qm energy again using the qm reference energy
            self.qm_normalise()

            # Calculate the mm energy
            self.reset_torsions()
            # Use the parameters to get the current energies
            self.mm_energy = deepcopy(self.starting_energy)

            # Graph the energy
            self.plot_results(name="testing_torsion", torsion_test=True)

            os.chdir("../../")
Exemple #4
0
    def single_point_matching(self, fitting_error, opt_parameters):
        """A function the call the single point matching method of parameter refinement.

        method (fit only new generation)
        -------------------
        1) take parameters from the initial scipy fitting.
        2) Do a MM torsion scan with the parameters and get the rmsd error and energy error between this new surface
        and the qm optimised surface
        3) Now fit to the qm surface again using a small restrain penalty
        """

        converged = False

        # Set the optimisation method if we have a hybrd method we need to try and take the last option
        self.method = self.methods.get(
            self.molecule.opt_method.split("_")[-1], None)
        print(f"The optimisation method is {self.method}")

        # put in the objective dict
        objective = {
            "fitting_error": [],
            "energy_error": [],
            "rmsd": [],
            "total": [],
            "parameters": [],
        }

        iteration = 1
        # start the main optimizer loop by calculating new single point energies
        while not converged:
            # move into the first iteration folder
            make_and_change_into(f"Iteration_{iteration}")

            # step 2 MM torsion scan
            # with wavefront propagation, returns the new set of coords these become the new scan coords
            self.scan_coords = self.drive_mm("torsiondrive")

            # also save these coords to the coords store
            self.coords_store = deepcopy(self.coords_store + self.scan_coords)

            # step 3 calculate the rmsd for these structures compared to QM ones
            rmsd_vector = self.scan_rmsd(self.scan_coords)

            # Calculate how well the new relative surface represents the QM one
            energy_error = self.objective(opt_parameters)

            # this now acts as the intial energy for the next fit
            self.initial_energy = deepcopy(self.mm_energy)

            # add the results to the dictionary
            objective["fitting_error"].append(fitting_error)
            objective["energy_error"].append(energy_error)
            objective["rmsd"].append(sum(rmsd_vector) / len(rmsd_vector))
            objective["total"].append(energy_error +
                                      sum(rmsd_vector) / len(rmsd_vector))
            objective["parameters"].append(opt_parameters)

            # Print the results of the iteration
            self.optimiser_log.write("After refinement the errors are:\n")
            for error, value in objective.items():
                self.optimiser_log.write(f"{error}: {value}\n")
            self.optimiser_log.flush()

            # Check convergence
            if objective["total"][-1] <= 0.25:
                print(
                    f"Fitting converged after {iteration} iterations exiting..."
                )
                # This takes us out of the refinement loop and stops any parameter changes
                break

            # Has the error converged?
            if iteration < 7:

                # Don't move too far away from the last set of optimised parameters if they got a good fit
                self.starting_params = opt_parameters
                # turn on the penalty if the error is getting close to the threshold
                if energy_error <= 1.5:
                    self.l_pen = 0.15
                else:
                    print("Turning off penalty due to large errors.")
                    self.l_pen = 0

                # optimise using the scipy method for the new structures with a penalty to remain close to the old
                fitting_error, opt_parameters = self.scipy_optimiser()

                # update the parameters in the fitting vector and the molecule for the MM scans
                self.update_tor_vec(opt_parameters)
                self.update_mol()

                # use the parameters to get the current energies
                self.mm_energy = deepcopy(self.mm_energies())

                self.optimiser_log.write(
                    f"Results for fitting iteration: {iteration}\n")
                self.optimiser_log.flush()
                # plot the fitting graph this iteration
                self.plot_results(name=f"SP_iter_{iteration}")

                # move out of the folder
                os.chdir("../")

                # add 1 to the iteration
                iteration += 1

            else:
                # use the parameters to get the current energies
                self.mm_energy = deepcopy(self.mm_energies())
                # print the final iteration energy prediction
                self.plot_results(name=f"SP_iter_{iteration}")
                os.chdir("../")
                break

        # find the minimum total error index in list
        min_error = min(objective["total"])
        min_index = objective["total"].index(min_error)

        # gather the parameters with the lowest error, not always the last parameter set
        final_parameters = deepcopy(objective["parameters"][min_index])
        # final_parameters = deepcopy(objective['parameters'][-1])
        final_error = objective["total"][min_index]
        # final_error = objective['total'][-1]
        self.optimiser_log.write(
            f"The lowest error:{final_error}\nThe corresponding parameters:{final_parameters}\n"
            f"were found on iteraion {min_index + 1}\n")
        self.optimiser_log.flush()

        # now we want to see how well we have captured the initial QM energy surface
        # reset the scan coords to the initial values
        self.scan_coords = self.initial_coords

        # Also save the last mm surface generated by the parameters
        final_surface_energy = deepcopy(self.mm_energy)

        # get the energy surface for these final parameters at the qm geometry
        energy_error = self.objective(final_parameters)
        self.optimiser_log.write(
            f"The final error at the qm optimised geometries is {energy_error}\n"
        )

        # get the starting energies back to the initial values before fitting
        self.initial_energy = self.starting_energy
        # plot the results this is a graph of the starting QM surface and how well we can remake it
        self.optimiser_log.write("The final stage 2 fitting results:\n")
        self.optimiser_log.flush()

        self.plot_results(
            name="Stage2_Single_point_fit",
            extra_points={
                "Final parameters MM geometry": final_surface_energy
            },
        )

        # Plot the convergence of the energy rmsd and total errors
        self.plot_convergence(objective)

        # Plot the correlation between the single point energies over all structures sampled in the fitting
        # Using the initial and final parameters
        self.plot_correlation(final_parameters)

        return final_error, final_parameters
Exemple #5
0
    def drive_mm(self, engine):
        """Drive the torsion again using MM to get new structures."""

        # Write an xml file with the new parameters

        # Move into a temporary folder torsion drive gives an error if we use temp directory module
        temp = f"{engine}_scan"
        try:
            rmtree(temp)
        except FileNotFoundError:
            pass

        make_and_change_into(temp)

        # Write out a pdb file of the qm optimised geometry
        self.molecule.to_file(file_name="openmm.pdb")
        # Also need an xml file for the molecule to use in geometric
        self.molecule.write_parameters(name="openmm")
        # openmm.pdb and input.xml are the expected names for geometric
        with open("log.txt", "a+") as log:
            if engine == "torsiondrive":
                if self.molecule.constraints_file is not None:
                    os.system("mv ../constraints.txt .")
                self.write_dihedrals()

                step_size = self.molecule.increments[self.scan]

                sp.run(
                    f"torsiondrive-launch -e openmm openmm.pdb dihedrals.txt -v -g {step_size}"
                    f' {self.molecule.constraints_file if self.molecule.constraints_file is not None else ""}',
                    shell=True,
                    stderr=log,
                    stdout=log,
                    check=True,
                )

                self.molecule.read_tdrive(self.scan)
                positions = self.molecule.qm_scans[self.scan][1]

            elif engine == "geometric":
                if self.molecule.constraints_file is not None:
                    os.system("mv ../constraints.txt .")
                else:
                    self.make_constraints()

                sp.run(
                    "geometric-optimize --epsilon 0.0 --maxiter 500 --qccnv true --pdb openmm.pdb "
                    "--engine openmm state.xml qube_constraints.txt",
                    shell=True,
                    stdout=log,
                    stderr=log,
                    check=True,
                )

            else:
                raise NotImplementedError(
                    "Invalid torsion engine. Please use torsiondrive or geometric"
                )

        # move back to the master folder
        os.chdir("../")

        return positions