def _run_torsion_drives(self, molecule: "Ligand", torsion_scans: List[TorsionScan]) -> "Ligand": """ Run the list of validated torsion drives. Note: We do not change the initial coordinates passed at this point. Args: molecule: The molecule to be scanned. torsion_scans: A list of TorsionScan jobs to perform detailing the dihedral and the scan range. Returns: The updated molecule object with the scan results. """ for torsion_scan in torsion_scans: # make a folder and move into to run the calculation folder = "SCAN_" folder += "_".join([str(t) for t in torsion_scan.torsion]) with folder_setup(folder): print( f"Running scan for dihedral: {torsion_scan.torsion} with range: {torsion_scan.scan_range}" ) result_mol = self.torsion_driver.run_torsiondrive( molecule=molecule, dihedral_data=torsion_scan) return result_mol
def _run_qm_opt( self, molecule: "Ligand", conformers: List[np.array], qc_spec: QCOptions, local_options: LocalResource, ) -> "Ligand": """ Run the main QM optimisation on each of the input conformers in order and stop when we get a fully optimised structure. Args: molecule: The qubekit molecule to run the optimisation on. conformers: The list of pre-optimised conformers. qc_spec: The QCSpec used to run the QM optimisation. local_options: The local resource that is available for the optimisation. """ opt_mol = deepcopy(molecule) g_opt = GeometryOptimiser(convergence=self.convergence_criteria, maxiter=50) for i, conformer in enumerate( tqdm(conformers, desc="Optimising conformer", total=len(conformers), ncols=80)): with folder_setup(folder_name=f"conformer_{i}"): # set the coords opt_mol.coordinates = conformer # errors are auto raised from the class so catch the result, and write to file qm_result, result = g_opt.optimise( molecule=opt_mol, allow_fail=True, return_result=True, qc_spec=qc_spec, local_options=local_options, ) if result.success: break else: # grab last coords and bump coords = qm_result.coordinates + np.random.choice( a=[0, 0.01], size=(qm_result.n_atoms, 3)) opt_mol.coordinates = coords bump_mol, bump_result = g_opt.optimise( molecule=opt_mol, allow_fail=True, return_result=True, local_options=local_options, qc_spec=qc_spec, ) if bump_result.success: qm_result = bump_mol break else: raise GeometryOptimisationError( "No molecule conformer could be optimised to GAU TIGHT") return qm_result
def optimise_grid_point( geometry_optimiser: GeometryOptimiser, molecule: "Ligand", qc_spec: "QCOptions", local_options: "LocalResource", # coordinates in bohr coordinates: List[float], dihedral: Tuple[int, int, int, int], dihedral_angle: int, job_id: int, ) -> GridPointResult: """ For the given molecule at its initial coordinates perform a restrained optimisation at the given dihedral angle. This is separated from the class to make multiprocessing lighter. Args: geometry_optimiser: The geometry optimiser that should be used, this should already be configured to the correct method and basis. molecule: The molecule which is to be optimised. coordinates: The input coordinates in bohr made by torsiondrive. dihedral: The atom indices of the dihedral which should be fixed. dihedral_angle: The angle the dihedral should be set to during the optimisation. job_id: The id of the job used to build the scratch folder Returns: The result of the optimisation which contains the initial and final geometry along with the final energy. """ # build a folder to run the calculation in we only store the last calculation at the grid point. with folder_setup(folder_name=f"grid_point_{dihedral_angle}_job_{job_id}"): # build the optimiser constraints and set torsiondrive settings optimiser_settings = _build_optimiser_settings( dihedral=dihedral, dihedral_angle=dihedral_angle) opt_mol = copy.deepcopy(molecule) input_coords = np.array(coordinates) opt_mol.coordinates = (input_coords * constants.BOHR_TO_ANGS).reshape( (opt_mol.n_atoms, 3)) result_mol, full_result = geometry_optimiser.optimise( molecule=opt_mol, qc_spec=qc_spec, local_options=local_options, allow_fail=False, return_result=True, extras=optimiser_settings, ) # make the result class result_data = GridPointResult( dihedral_angle=dihedral_angle, input_geometry=coordinates, final_geometry=(result_mol.coordinates * constants.ANGS_TO_BOHR).ravel().tolist(), final_energy=full_result.energies[-1], ) return result_data
def _run_workflow( self, molecule: "Ligand", workflow: List[str], results: WorkFlowResult, ) -> WorkFlowResult: """ The main run method of the workflow which will execute each stage inorder on the given input molecule. Args: molecule: The molecule to be re-parametrised using QUBEKit. workflow: The list of prefiltered stage names which should be ran in order. results: The results object that we should update throughout the workflow. Returns: A fully parametrised molecule. """ # try and find missing dependencies self.validate_workflow(workflow=workflow, molecule=molecule) # start message # TODO Move to outside workflow so this doesn't get printed for every run in bulk. print( "If QUBEKit ever breaks or you would like to view timings and loads of other info, " "view the log file.\nOur documentation (README.md) " "also contains help on handling the various commands for QUBEKit.\n" ) # write out the results object to track the status at the start results.to_file(filename=self._results_fname) # loop over stages and run for field in workflow: stage: StageBase = getattr(self, field) # some stages print based on what spec they are using print(stage.start_message(qc_spec=self.qc_options)) molecule = self._run_stage(stage_name=field, stage=stage, molecule=molecule, results=results) print(stage.finish_message()) # now the workflow has finished # write final results results.to_file(filename=self._results_fname) # write out final parameters with folder_setup("final_parameters"): # if we have U-B terms we need to write a non-standard pdb file if molecule.has_ub_terms(): molecule._to_ub_pdb() else: molecule.to_file(file_name=f"{molecule.name}.pdb") molecule.write_parameters(file_name=f"{molecule.name}.xml") return results
def _call_chargemol(self, density_file_content: str, molecule: "Ligand") -> "Ligand": """ Run ChargeMol on the density file from gaussian and extract the AIM reference data and store it into the molecule. Args: density_file_content: A string containing the density file content which will be wrote to file. molecule: The molecule the reference data should be stored into. Returns: A molecule updated with the ChargeMol reference data. """ with folder_setup(folder_name="ChargeMol"): # write the wfx file density_file = f"{molecule.name}.wfx" with open(density_file, "w+") as d_file: d_file.write(density_file_content) # build the chargemol input self._build_chargemol_input(density_file_name=density_file, molecule=molecule) # Export a variable to the environment that chargemol will use to work out the threads, must be a string os.environ["OMP_NUM_THREADS"] = str(self.cores) with open("log.txt", "w+") as log: control_path = ( "chargemol_FORTRAN_09_26_2017/compiled_binaries/linux/" "Chargemol_09_26_2017_linux_parallel job_control.txt") try: sp.run( os.path.join(os.getenv("CHARGEMOL_DIR"), control_path), shell=True, stdout=log, stderr=log, check=True, ) return ExtractChargeData.extract_charge_data_chargemol( molecule=molecule, dir_path="", ddec_version=self.ddec_version) except sp.CalledProcessError: raise ChargemolError( "Chargemol did not execute properly; check the output file for details." ) finally: del os.environ["OMP_NUM_THREADS"]
def run( input_file: Optional[str] = None, smiles: Optional[str] = None, name: Optional[str] = None, multiplicity: int = 1, end: Optional[str] = None, skip_stages: Optional[List[str]] = None, config: Optional[str] = None, protocol: Optional[str] = None, cores: Optional[int] = None, memory: Optional[int] = None, ): """Run the QUBEKit parametrisation workflow on an input molecule.""" # make sure we have an input or smiles not both if input_file is not None and smiles is not None: raise RuntimeError( "Please supply either the name of the input file or a smiles string not both." ) # load the molecule if input_file is not None: molecule = Ligand.from_file(file_name=input_file, multiplicity=multiplicity) else: if name is None: raise RuntimeError( "Please also pass a name for the molecule when starting from smiles." ) molecule = Ligand.from_smiles(smiles_string=smiles, name=name, multiplicity=multiplicity) # load workflow workflow = prep_config(config_file=config, memory=memory, cores=cores, protocol=protocol) # move into the working folder and run with folder_setup( f"QUBEKit_{molecule.name}_{datetime.now().strftime('%Y_%m_%d')}"): # write the starting molecule molecule.to_file(file_name=f"{molecule.name}.pdb") workflow.new_workflow(molecule=molecule, skip_stages=skip_stages, end=end)
def run( bulk_file: str, skip_stages: Optional[List[str]] = None, end: Optional[str] = None, restart: Optional[str] = None, config: Optional[str] = None, protocol: Optional[str] = None, cores: Optional[int] = None, memory: Optional[int] = None, ) -> None: """Run the QUBEKit parametrisation workflow on a collection of molecules in serial. Loop over the molecules in order of the CSV file. """ import glob import os from qubekit.utils.helpers import mol_data_from_csv home = os.getcwd() # load all inputs bulk_data = mol_data_from_csv(bulk_file) # start main molecule loop for name, mol_data in bulk_data.items(): print(f"Analysing: {name}") try: if restart is not None or mol_data["restart"] is not None: # we are trying to restart a run, find the folder # should only be one fname = name.split(".")[0] folder = glob.glob(f"QUBEKit_{fname}_*")[0] with folder_setup(folder): results = WorkFlowResult.parse_file("workflow_result.json") if config is None: # if we have no new config load from results workflow = prep_config( results=results, cores=cores, memory=memory, protocol=None ) else: # load the new config file workflow = prep_config( config_file=config, cores=cores, memory=memory ) workflow.restart_workflow( start=restart or mol_data["restart"], skip_stages=skip_stages, end=end or mol_data["end"], result=results, ) else: if mol_data["smiles"] is not None: molecule = Ligand.from_smiles( smiles_string=mol_data["smiles"], name=name ) else: molecule = Ligand.from_file(file_name=name) # load the CLI config or the csv config, else default workflow = prep_config( config_file=config or mol_data["config_file"], memory=memory, cores=cores, protocol=protocol, ) # move into the working folder and run with folder_setup( f"QUBEKit_{molecule.name}_{datetime.now().strftime('%Y_%m_%d')}" ): # write the starting molecule molecule.to_file(file_name=f"{molecule.name}.pdb") workflow.new_workflow( molecule=molecule, skip_stages=skip_stages, end=end or mol_data["end"], ) except WorkFlowExecutionError: os.chdir(home) print( f"An error was encountered while running {name} see folder for more info." ) continue