def serialise_system(self): """Create the OpenMM system; parametrise using frost; serialise the system.""" # Load the molecule using openforcefield pdb_file = app.PDBFile(f'{self.molecule.name}.pdb') # Now we need the connection info try using smiles string from rdkit rdkit = RDKit() molecule = Molecule.from_smiles( rdkit.get_smiles(f'{self.molecule.name}.pdb')) # Make the openMM system omm_topology = pdb_file.topology off_topology = Topology.from_openmm(omm_topology, unique_molecules=[molecule]) # Load the smirnoff99Frosst force field. forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml') # Parametrize the topology and create an OpenMM System. system = forcefield.create_openmm_system(off_topology) # Serialise the OpenMM system into the xml file with open('serialised.xml', 'w+') as out: out.write(XmlSerializer.serializeSystem(system))
def _read_input(self): """ Figure out what the input is (file, smiles, json) and call the relevant method. """ if self.mol_input.__class__.__name__ == 'Molecule': # QCArchive object self._read_qc_json() elif hasattr(self.mol_input, 'stem'): # File (pdb, xyz, etc) try: # Try parse with rdkit: self.rdkit_mol = RDKit.mol_input_to_rdkit_mol(self.mol_input) self._mol_from_rdkit() except AttributeError: # Cannot be parsed by rdkit: self._read_file() elif isinstance(self.mol_input, str): # Smiles string input self.rdkit_mol = RDKit.smiles_to_rdkit_mol(self.mol_input, self.name) self._mol_from_rdkit() else: raise RuntimeError('Cannot read input. mol_input must be a smiles string, path of a file, or qc json.')
def handle_bulk(self): """ Getting and setting configs for bulk runs is a little different, requiring this method. The configs are taken from the .csv, then the .ini, then the terminal. This is repeated for each molecule in the bulk run, then Execute is called. Configs cannot be changed between molecule analyses as config data is only loaded once at the start; -restart is required for that. """ csv_file = self.args.bulk_run # mol_data_from_csv handles defaults if no argument is given bulk_data = mol_data_from_csv(csv_file) names = list(bulk_data) home = os.getcwd() for name in names: printf(f'Analysing: {name}\n') # Get pdb from smiles or name if no smiles is given if bulk_data[name]['smiles'] is not None: smiles_string = bulk_data[name]['smiles'] rdkit = RDKit() self.file = rdkit.smiles_to_pdb(smiles_string, name) else: self.file = f'{name}.pdb' # Initialise molecule, ready to add configs to it self.molecule = Ligand(self.file) # Read each row in bulk data and set it to the molecule object for key, val in bulk_data[name].items(): setattr(self.molecule, key, val) setattr(self.molecule, 'skip', []) # Using the config file from the .csv, gather the .ini file configs file_configs = Configure.load_config(self.molecule.config) for key, val in file_configs.items(): setattr(self.molecule, key, val) # Handle configs which are changed by terminal commands for key, val in vars(self.args).items(): if val is not None: setattr(self.molecule, key, val) # Now that all configs are stored correctly: execute. Execute(self.molecule) os.chdir(home) sys.exit( 'Bulk analysis complete.\nUse QUBEKit -progress to view the completion progress of your molecules' )
def __init__(self): # First make sure the config folder has been made missing for conda and pip home = str(Path.home()) config_folder = f'{home}/QUBEKit_configs/' if not os.path.exists(config_folder): os.makedirs(config_folder) print(f'Making config folder at: {home}') self.args = self.parse_commands() # If it's a bulk run, handle it separately # TODO Add .sdf as possible bulk_run, not just .csv if self.args.bulk_run: self.handle_bulk() if self.args.restart is not None: # Find the pickled checkpoint file and load it as the molecule try: self.molecule = unpickle()[self.args.restart] except FileNotFoundError: raise FileNotFoundError('No checkpoint file found!') else: if self.args.smiles: self.file = RDKit().smiles_to_pdb(*self.args.smiles) else: self.file = self.args.input # Initialise molecule self.molecule = Ligand(self.file) # Find which config file is being used self.molecule.config = self.args.config_file # Handle configs which are in a file file_configs = Configure.load_config(self.molecule.config) for name, val in file_configs.items(): setattr(self.molecule, name, val) # Although these may be None always, they need to be explicitly set anyway. setattr(self.molecule, 'restart', None) setattr(self.molecule, 'end', None) setattr(self.molecule, 'skip', None) # Handle configs which are changed by terminal commands for name, val in vars(self.args).items(): if val is not None: setattr(self.molecule, name, val) # If restarting put the molecule back into the checkpoint file with the new configs if self.args.restart is not None: self.molecule.pickle(state=self.args.restart) # Now that all configs are stored correctly: execute. Execute(self.molecule)
def mm_optimise(molecule): """ Use an mm force field to get the initial optimisation of a molecule options --------- RDKit MFF or UFF force fields can have strange effects on the geometry of molecules Geometric / OpenMM depends on the force field the molecule was parameterised with gaff/2, OPLS smirnoff. """ append_to_log('Starting mm_optimisation') # Check which method we want then do the optimisation if molecule.mm_opt_method == 'none' or molecule.parameter_engine == 'OpenFF_generics': # Skip the optimisation step molecule.coords['mm'] = molecule.coords['input'] elif molecule.mm_opt_method == 'openmm': if molecule.parameter_engine != 'none': # Make the inputs molecule.write_pdb(input_type='input') molecule.write_parameters() # Run geometric # TODO Should this be moved to allow a decorator? with open('log.txt', 'w+') as log: sp.run(f'geometric-optimize --reset --epsilon 0.0 --maxiter {molecule.iterations} --pdb ' f'{molecule.name}.pdb --openmm {molecule.name}.xml ' f'{molecule.constraints_file if molecule.constraints_file is not None else ""}', shell=True, stdout=log, stderr=log) # This will continue even if we don't converge this is fine # Read the xyz traj and store the frames molecule.read_file(f'{molecule.name}_optim.xyz', input_type='traj') # Store the last from the traj as the mm optimised structure molecule.coords['mm'] = molecule.coords['traj'][-1] else: raise OptimisationFailed('You can not optimise a molecule with OpenMM and no initial parameters; ' 'consider parametrising or using UFF/MFF in RDKit') else: # TODO change to qcengine as this can already be done # Run an rdkit optimisation with the right FF rdkit_ff = {'rdkit_mff': 'MFF', 'rdkit_uff': 'UFF'} molecule.filename = RDKit().mm_optimise(molecule.filename, ff=rdkit_ff[molecule.mm_opt_method]) append_to_log(f'Finishing mm_optimisation of the molecule with {molecule.mm_opt_method}') return molecule
def finalise(molecule): """ Make the xml and pdb file; get the RDKit descriptors; print the ligand object to terminal (in abbreviated form) and to the log file (unabbreviated). """ molecule.write_pdb() molecule.write_parameters() molecule.descriptors = RDKit().rdkit_descriptors( f'{molecule.name}.pdb') pretty_print(molecule, to_file=True) pretty_print(molecule) return molecule
def finalise(molecule): """ Make the xml and pdb file print the ligand object to terminal (in abbreviated form) and to the log file after getting the rdkit descriptors. """ # write the pdb file and xml file to the folder molecule.write_pdb() molecule.write_parameters() # get the molecule descriptors from RDKit molecule.descriptors = RDKit.rdkit_descriptors(molecule.filename) pretty_print(molecule, to_file=True) pretty_print(molecule) return molecule
def from_file(cls, file_name: str) -> "ReadInput": """ Read the input file using RDKit and return the molecule data. """ input_file = Path(file_name) # if the file is not there raise an error if not input_file.exists(): raise FileNotFoundError( f"{input_file.as_posix()} could not be found is this path correct?" ) # xyz is a special case of file only internal readers catch if input_file.suffix == ".xyz": return cls.from_xyz(file_name=input_file.as_posix()) # read the input with rdkit rdkit_mol = RDKit.file_to_rdkit_mol(file_path=input_file) return cls(rdkit_mol=rdkit_mol, coords=None, name=rdkit_mol.GetProp("_Name"))
def read_file(self): """The base file reader used upon instancing the class; it will decide which file reader to use based on the file suffix.""" # Try to load the file using RDKit; this should ensure we always have the connection info try: rdkit_mol = RDKit().read_file(self.filename.name) # Now extract the molecule from RDKit self.mol_from_rdkit(rdkit_mol) except AttributeError: # AttributeError: errors when reading the input file print('RDKit error was found, resorting to standard file readers') # Try to read using QUBEKit readers they only get the connections if present if self.filename.suffix == '.pdb': self.read_pdb(self.filename) elif self.filename.suffix == '.mol2': self.read_mol2(self.filename)
def mm_optimise(self, molecule): """ Use an mm force field to get the initial optimisation of a molecule options --------- RDKit MFF or UFF force fields can have strange effects on the geometry of molecules Geometric / OpenMM depends on the force field the molecule was parameterised with gaff/2, OPLS smirnoff. """ # Check which method we want then do the optimisation # TODO if we don't want geometric we can do a quick native openmm full optimisation? if self.molecule.mm_opt_method == 'openmm': # Make the inputs molecule.write_pdb(input_type='input') molecule.write_parameters() # Run geometric with open('log.txt', 'w+') as log: sp.run( f'geometric-optimize --reset --epsilon 0.0 --maxiter {molecule.iterations} --pdb ' f'{molecule.name}.pdb --openmm {molecule.name}.xml {self.molecule.constraints_file}', shell=True, stdout=log, stderr=log) # This will continue even if we don't converge this is fine # Read the xyz traj and store the frames molecule.read_xyz(f'{molecule.name}_optim.xyz') # Store the last from the traj as the mm optimised structure molecule.molecule['mm'] = molecule.molecule['traj'][-1] else: # TODO change to qcengine as this can already be done # Run an rdkit optimisation with the right FF rdkit_ff = {'rdkit_mff': 'MFF', 'rdkit_uff': 'UFF'} molecule.filename = RDKit.mm_optimise( molecule.filename, ff=rdkit_ff[self.molecule.mm_opt_method]) append_to_log( f'mm_optimised the molecule with {self.molecule.mm_opt_method}') return molecule
def from_smiles(cls, smiles: str, name: Optional[str] = None) -> "ReadInput": """ Make a ReadInput object which can be taken by the Ligand class to make the model. Note ---- This method will generate a conformer for the molecule. Parameters ---------- smiles: The smiles string which should be parsed by rdkit. name: The name that should be given to the molecule. """ # Smiles string input rdkit_mol = RDKit.smiles_to_rdkit_mol(smiles_string=smiles, name=name) return cls(name=name, coords=None, rdkit_mol=rdkit_mol)
def __init__(self): self.args = self.parse_commands() # If it's a bulk run, handle it separately # TODO Add .sdf as possible bulk_run, not just .csv if self.args.bulk_run: self.handle_bulk() if self.args.restart: self.file = [ file for file in os.listdir(os.getcwd()) if '.pdb' in file ][0] else: if self.args.smiles: self.file = RDKit.smiles_to_pdb(self.args.smiles) else: self.file = self.args.input # Initialise molecule self.molecule = Ligand(self.file) # Find which config file is being used self.molecule.config = self.args.config_file # Handle configs which are in a file file_configs = Configure.load_config(self.molecule.config) for name, val in file_configs.items(): setattr(self.molecule, name, val) # Although these may be None always, they need be explicitly set anyway. setattr(self.molecule, 'restart', None) setattr(self.molecule, 'end', None) setattr(self.molecule, 'skip', None) # Handle configs which are changed by terminal commands for name, val in vars(self.args).items(): if val is not None: setattr(self.molecule, name, val) # Now that all configs are stored correctly: execute. Execute(self.molecule)
def symmetrise_from_topology(self): """ First, if rdkit_mol has been generated, get the bond and angle symmetry dicts. These will be used by L-J and the Harmonic Bond/Angle params Then, based on the molecule topology, symmetrise the methyl / amine hydrogens. If there's a carbon, does it have 3/2 hydrogens? -> symmetrise If there's a nitrogen, does it have 2 hydrogens? -> symmetrise Also keep a list of the methyl carbons and amine / nitrile nitrogens then exclude these bonds from the rotatable torsions list. """ # TODO This needs to be more applicable to proteins (e.g. if no rdkit_mol is created). if self.rdkit_mol is not None: self.atom_symmetry_classes = RDKit.find_symmetry_classes(self.rdkit_mol) self.get_bond_equiv_classes() self.get_angle_equiv_classes() if self.dihedrals is not None: self.get_dihedral_equiv_classes() methyl_hs, amine_hs, other_hs = [], [], [] methyl_amine_nitride_cores = [] for atom in self.atoms: if atom.atomic_symbol == 'C' or atom.atomic_symbol == 'N': hs = [] for bonded in self.topology.neighbors(atom.atom_index): if len(list(self.topology.neighbors(bonded))) == 1: # now make sure it is a hydrogen (as halogens could be caught here) if self.atoms[bonded].atomic_symbol == 'H': hs.append(bonded) if atom.atomic_symbol == 'C' and len(hs) == 2: # This is part of a carbon hydrogen chain other_hs.append(hs) elif atom.atomic_symbol == 'C' and len(hs) == 3: methyl_hs.append(hs) methyl_amine_nitride_cores.append(atom.atom_index) elif atom.atomic_symbol == 'N' and len(hs) == 2: amine_hs.append(hs) methyl_amine_nitride_cores.append(atom.atom_index) self.symm_hs = {'methyl': methyl_hs, 'amine': amine_hs, 'other': other_hs} # Modify the rotatable list to remove methyl and amine / nitrile torsions # These are already well represented in most FF's remove_list = [] if self.rotatable is not None: rotatable = self.rotatable for key in rotatable: if key[0] in methyl_amine_nitride_cores or key[1] in methyl_amine_nitride_cores: remove_list.append(key) for torsion in remove_list: rotatable.remove(torsion) self.rotatable = rotatable or None
def qm_optimise(self, molecule): """Optimise the molecule coords. Can be through PSI4 (with(out) geometric) or through Gaussian.""" append_to_log('Starting qm_optimisation') qm_engine = self.engine_dict[molecule.bonds_engine](molecule) max_restarts = 3 if molecule.geometric and (molecule.bonds_engine == 'psi4'): qceng = QCEngine(molecule) result = qceng.call_qcengine(engine='geometric', driver='gradient', input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}') restart_count = 0 while (not result['success']) and (restart_count < max_restarts): append_to_log(f'{molecule.bonds_engine} optimisation failed with error {result["error"]}; restarting', msg_type='minor') try: molecule.coords['temp'] = np.array( result['input_data']['final_molecule']['geometry']).reshape((len(molecule.atoms), 3)) molecule.coords['temp'] *= constants.BOHR_TO_ANGS result = qceng.call_qcengine(engine='geometric', driver='gradient', input_type='temp') except KeyError: result = qceng.call_qcengine(engine='geometric', driver='gradient', input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}') restart_count += 1 if not result['success']: raise OptimisationFailed("The optimisation did not converge") molecule.read_geometric_traj(result['trajectory']) # store the final molecule as the qm optimised structure molecule.coords['qm'] = np.array(result['final_molecule']['geometry']).reshape((len(molecule.atoms), 3)) molecule.coords['qm'] *= constants.BOHR_TO_ANGS molecule.qm_energy = result['energies'][-1] # Write out the trajectory file molecule.write_xyz('traj', name=f'{molecule.name}_opt') molecule.write_xyz('qm', name='opt') # Using Gaussian or geometric off else: result = qm_engine.generate_input(input_type=f'{"mm" if list(molecule.coords["mm"]) else "input"}', optimise=True, execute=molecule.bonds_engine) restart_count = 0 while (not result['success']) and (restart_count < max_restarts): append_to_log(f'{molecule.bonds_engine} optimisation failed with error {result["error"]}; restarting', msg_type='minor') if result['error'] == 'FileIO': result = qm_engine.generate_input('mm', optimise=True, restart=True, execute=molecule.bonds_engine) elif result['error'] == 'Max iterations': result = qm_engine.generate_input('input', optimise=True, restart=True, execute=molecule.bonds_engine) else: molecule.coords['temp'] = RDKit().generate_conformers(molecule.rdkit_mol)[0] result = qm_engine.generate_input('temp', optimise=True, execute=molecule.bonds_engine) restart_count += 1 if not result['success']: raise OptimisationFailed(f"{molecule.bonds_engine} " f"optimisation did not converge after 3 restarts; last error {result['error']}") molecule.coords['qm'], molecule.qm_energy = qm_engine.optimised_structure() molecule.write_xyz('qm', name='opt') append_to_log(f'Finishing qm_optimisation of molecule{" using geometric" if molecule.geometric else ""}') return molecule
def qm_optimise(self, molecule): """Optimise the molecule with or without geometric.""" # TODO this method's not always printing completion to log file. append_to_log('Starting qm_optimisation') qm_engine = self.engine_dict[molecule.bonds_engine](molecule) if molecule.geometric and molecule.bonds_engine == 'psi4': qceng = QCEngine(molecule) # See if the structure is there if not we did not optimise if molecule.coords['mm'].any(): result = qceng.call_qcengine('geometric', 'gradient', input_type='mm') else: result = qceng.call_qcengine('geometric', 'gradient', input_type='input') # Check if converged and get the geometry if result['success']: # Load all of the frames into the molecule's trajectory holder molecule.read_geometric_traj(result['trajectory']) # store the last frame as the qm optimised structure molecule.coords['qm'] = molecule.coords['traj'][-1] # Write out the trajectory file molecule.write_xyz(input_type='traj', name=f'{molecule.name}_opt') molecule.write_xyz(input_type='qm', name='opt') append_to_log( f'Finishing qm_optimisation of molecule{" using geometric" if molecule.geometric else ""}' ) return molecule else: # TODO catch the qcengine error here print(result) # catch the steps done so far raise OptimisationFailed("The optimisation did not converge") elif molecule.coords['mm'].any(): result = qm_engine.generate_input(input_type='mm', optimise=True) else: result = qm_engine.generate_input(input_type='input', optimise=True) # Check the exit status of the job; if failed restart the job up to 2 times restart_count = 1 while not result['success'] and restart_count < 3: append_to_log( f'{molecule.bonds_engine} optimisation failed with error {result["error"]}; restarting', msg_type='minor') # Now we should handle the errors that we have in the results # 1) If we have a file read error just start again if result['error'] == 'FileIO': result = qm_engine.generate_input(input_type='mm', optimise=True, restart=True) # 2) If we have a distance matrix error we should start from a different structure try the input elif result['error'] == 'Distance matrix' and restart_count == 1: result = qm_engine.generate_input(input_type='input', optimise=True) # 3) If we have already tried the starting structure generate a conformer and try again elif result['error'] == 'Distance matrix': molecule.write_pdb() rdkit = RDKit() molecule.coords['temp'] = rdkit.generate_conformers( f'{molecule.name}.pdb')[0] result = qm_engine.generate_input(input_type='temp', optimise=True) restart_count += 1 if not result['success']: raise OptimisationFailed( f"{molecule.bonds_engine} " f"optimisation did not converge after 3 restarts; last error {result['error']}" ) molecule.coords[ 'qm'], molecule.qm_energy = qm_engine.optimised_structure() molecule.write_xyz(input_type='qm', name='opt') append_to_log( f'Finishing qm_optimisation of molecule{" using geometric" if molecule.geometric else ""}' ) return molecule