def checkTorsion(smiles, torsion_indices, ff_name): """ Take mollist and check if the molecules in a list match a specific torsion id Parameters ---------- molList : List of objects List of oemols with datatags generated in genData function Returns ------- molList : list of objects List of oemol objects that have a datatag "IDMatch" that contain the torsion id involved in the QCA torsion drive """ matches = [] count = 0 mols = [] #tid='' #molecule = Molecule.from_mapped_smiles(smiles) print(smiles) from openeye import oechem # create a new molecule #mol = oechem.OEGraphMol() # convert the SMILES string into a molecule #oechem.OESmilesToMol(mol,smiles) #molecule = Molecule.from_smiles(smiles) #molecule=Molecule.from_openeye(mol) molecule = Molecule.from_mapped_smiles(smiles) topology = Topology.from_molecules(molecule) # Let's label using the Parsley force field forcefield = ForceField(ff_name, allow_cosmetic_attributes=True) # Run the molecule labeling molecule_force_list = forcefield.label_molecules(topology) params = [] indices = [] # Print out a formatted description of the torsion parameters applied to this molecule for mol_idx, mol_forces in enumerate(molecule_force_list): # print(f'Forces for molecule {mol_idx}') for force_tag, force_dict in mol_forces.items(): if force_tag == "ProperTorsions": for (atom_indices, parameter) in force_dict.items(): params.append(parameter.id) indices.append(atom_indices) #torsion_indices=tuple(torsion_indices) #print(type(torsion_indices)) print(torsion_indices) #print(type(atom_indices)) print(atom_indices) if atom_indices == torsion_indices or tuple( reversed(atom_indices)) == torsion_indices: #mol.SetData("IDMatch", parameter.id) tid = parameter.id print(params) print(indices) return tid
def get_assigned_torsion_param(tdentry, forcefield): """Get the OpenFF forcefield torsion parameter ultimately assigned to the given TorsionDrive entry's torsion dihedral. Parameters ---------- tdentry : TDEntry TDEntry (TorsionDrive entry) to operate on; will be used to generate molecule, extract dihedral indices driven. forcefield : str, ForceField OpenFF forcefield to apply. Returns ------- torsion_params : ProperTorsion Dict-like object with attributes giving the applied torsion parameters Examples -------- Starting with TDEntries from usage of `get_torsiondrives_matching_smarts` (see its Example), we can get back the parameter assigned to this by, say `"openff-1.0.0.offxml"`: >>> from openforcefield.typing.engines.smirnoff import ForceField >>> tdentries = get_torsiondrives_matching_smarts(smarts, dataset, client) >>> ff = ForceField('openff-1.0.0.offxml') >>> assigned = [smarts_torsions.get_assigned_torsion_param(tdentry, ff) for tdentry in tdentries] >>> print([t.id for t in assigned]) ['t47', 't47', 't47', 't47', ...] """ mol_smiles = tdentry.attributes["canonical_isomeric_explicit_hydrogen_mapped_smiles"] offmol = Molecule.from_mapped_smiles(mol_smiles) if isinstance(forcefield, str): forcefield = ForceField(forcefield) # apply forcefield parameters topology = Topology.from_molecules(offmol) # we only have one molecule by definition here, so extracting 0th molecule_forces = forcefield.label_molecules(topology)[0] # by convention, we only have one driven torsion # would need to revisit if we are working with 2D torsions dihedral_indices = tdentry.td_keywords.dihedrals[0] # get torsion parameters corresponding to dihedral indices torsions = molecule_forces["ProperTorsions"] torsion_params = torsions.get(dihedral_indices) # if None, try reversing it if torsion_params is None: torsion_params = torsions[dihedral_indices[::-1]] return torsion_params
def paramUsage(smilesList, offxml): """ Description - Reads in list of smiles and returns a dictionary of .offxml style parameters as keys and smiles of molecules as items Input - smilesList: A list of smiles offxml: The .offxml format force field that the parameters will be used with Return - anglebondDict: A dictionary of .offxml style parameters as keys and smiles of molecules that utilize parameters. The returned dictionary is only for bond and angle parameters e.g. 'a1', 'b2', etc. Note: The function can be modified to return a dictionary of torsion parameters. """ # Initialize storage torsionDict = dict() anglebondDict = dict() # Let's label using our RC force field forcefield = ForceField(offxml) # Loop over smiles for smi in smilesList: # Create a simple molecule from SMILES and turn it into a topology. molecule = Molecule.from_smiles(smi, allow_undefined_stereo = True) topology = Topology.from_molecules([molecule]) # Run the molecule labeling molecule_force_list = forcefield.label_molecules(topology) # Print out a formatted description of the parameters applied to this molecule for mol_idx, mol_forces in enumerate(molecule_force_list): for force_tag, force_dict in mol_forces.items(): for (atom_indices, parameter) in force_dict.items(): pid = parameter.id #create two seperate parameter usage dictionaries for (1) angle and bonds and (2) torsions if "a" in pid or "b" in pid: if not pid in anglebondDict: anglebondDict[pid] = set() anglebondDict[pid].add(smi) #Uncomment this for torsion dictionary #if "t" in pid: # if not pid in torsionDict: # torsionDict[pid] = set() # torsionDict[pid].add(smi) #Write out the angle and bond dictionary to "anglebond.p" file pickle.dump(anglebondDict, open( "anglebond.p", "wb" ) ) return anglebondDict
def checkTorsion(molList, ff_name): """ Take mollist and check if the molecules in a list match a specific torsion id Parameters ---------- molList : List of objects List of oemols with datatags generated in genData function Returns ------- molList : list of objects List of oemol objects that have a datatag "IDMatch" that contain the torsion id involved in the QCA torsion drive """ matches = [] count = 0 mols = [] for mol in molList: molecule = Molecule.from_mapped_smiles(mol.GetData("cmiles")) topology = Topology.from_molecules(molecule) # Let's label using the Parsley force field forcefield = ForceField(ff_name) # Run the molecule labeling molecule_force_list = forcefield.label_molecules(topology) params = [] # Print out a formatted description of the torsion parameters applied to this molecule for mol_idx, mol_forces in enumerate(molecule_force_list): # print(f'Forces for molecule {mol_idx}') for force_tag, force_dict in mol_forces.items(): if force_tag == "ProperTorsions": for (atom_indices, parameter) in force_dict.items(): params.append(parameter.id) if atom_indices == mol.GetData("TDindices") or tuple( reversed(atom_indices) ) == mol.GetData("TDindices"): count += 1 mol.SetData("IDMatch", parameter.id) mols.append(mol) print( "Out of " + str(len(molList)) + " molecules, " + str(count) + " were processed with checkTorsion()" ) return mols
def get_dihedral_parameters(self): """ It returns the parameters of the dihedral that is being tracked, according to the OpenFF toolkit. Returns ------- parameters : dict The dictionary with the parameters """ from openforcefield.topology import Topology from openforcefield.typing.engines.smirnoff import ForceField topology = Topology.from_molecules([self.molecule.off_molecule]) ff = ForceField(self.forcefield + '.offxml') all_parameters = ff.label_molecules(topology)[0] parameters = dict(all_parameters['ProperTorsions'])[self.atom_indexes] return parameters
def __init__(self, dihedral_benchmark): """ It initializes an OpenFFEnergeticProfile object. Parameters ---------- dihedral_benchmark : an peleffybenchmarktools.dihedrals.DihedralBenchmark object The DihedralBenchmark object that will be used to obtain the energetic profile """ super().__init__(dihedral_benchmark) from openforcefield.topology import Topology from openforcefield.typing.engines.smirnoff import ForceField mol = self.dihedral_benchmark.molecule topology = Topology.from_molecules([mol.off_molecule]) ff = ForceField(mol.forcefield + '.offxml') parameters = ff.label_molecules(topology)[0] self._parameters = dict(parameters['ProperTorsions'])
def checkParam(cmiles, ff2): molecules=Molecule.from_mapped_smiles(cmiles) topology = Topology.from_molecules([molecules]) #added # Let's label using the Parsley force field forcefield2 = ForceField(ff2, allow_cosmetic_attributes=True) # Run the molecule labeling molecule_force_list = forcefield2.label_molecules(topology) #print(dict(molecule_force_list[0]['ProperTorsions'])) # Print out a formatted description of the torsion parameters applied to this molecule #plot_dict = {} for mol_idx, mol_forces in enumerate(molecule_force_list): for force_tag, force_dict in mol_forces.items(): print(force_tag) if force_tag == 'Bonds': for (atom_indices, parameter) in force_dict.items(): if parameter.id == 'b1': print('match') return cmiles
def test_coverage_filter(): """ Make sure the coverage filter removes the correct molecules. """ from openforcefield.typing.engines.smirnoff import ForceField coverage_filter = workflow_components.CoverageFilter() coverage_filter.allowed_ids = ["b83"] coverage_filter.filtered_ids = ["b87"] mols = get_stereoisomers() # we have to remove duplicated records # remove duplicates from the set molecule_container = get_container(mols) result = coverage_filter.apply(molecule_container.molecules, processors=1) forcefield = ForceField("openff_unconstrained-1.0.0.offxml") # now see if any molecules do not have b83 parameters_by_id = {} for molecule in result.molecules: labels = forcefield.label_molecules(molecule.to_topology())[0] covered_types = set( [label.id for types in labels.values() for label in types.values()] ) # now store the smiles under the ids for parameter in covered_types: parameters_by_id.setdefault(parameter, []).append(molecule.to_smiles()) expected = parameters_by_id["b83"] for molecule in result.molecules: assert molecule.to_smiles() in expected assert "dihedrals" not in molecule.properties # we now need to check that the molecules passed contain only the allowed atoms # do this by running the component again result2 = coverage_filter.apply(result.molecules, processors=1) assert result2.n_filtered == 0 assert result.n_molecules == result.n_molecules
def get_parameters_from_forcefield(self, forcefield, molecule): """ It returns the parameters that are obtained with the supplied forcefield for a certain offpele's molecule. Parameters ---------- forcefield : str or an openforcefield.typing.engines.smirnoff.ForceField object The forcefield from which the parameters will be obtained molecule : an offpele.topology.Molecule The offpele's Molecule object Returns ------- openforcefield_parameters : an OpenForceFieldParameters object The OpenForceFieldParameters object """ from openforcefield.typing.engines.smirnoff import ForceField from openforcefield.topology import Topology off_molecule = molecule.off_molecule topology = Topology.from_molecules([off_molecule]) if isinstance(forcefield, str): forcefield = ForceField(forcefield) elif isinstance(forcefield, ForceField): pass else: raise Exception('Invalid forcefield type') molecule_parameters_list = forcefield.label_molecules(topology) assert len(molecule_parameters_list) == 1, 'A single molecule is ' \ 'expected' return self.OpenForceFieldParameters(molecule_parameters_list[0])
class Run(object): """ Create FEP files from a common substructure for a given set of ligands """ def __init__(self, lig, FF, merge, *args, **kwargs): """ The init method is a kind of constructor, called when an instance of the class is created. The method serves to initialize what you want to do with the object. """ self.lig = lig self.FF = FF self.merge = merge self.ff_list = [] self.mapping = {} self.total_charge = 0 self.masses = { "H": "1.0080", "C": "12.0110", "N": "14.0070", "O": "15.9994", "F": "19.0000", "P": "30.9700", "S": "32.0600", "Cl": "35.0000", "Br": "79.9000", "I": "126.90", "DUM": "0.0000" } def openff(self): # Load the molecule (for now mol2, until charges are saved on sdf) molecule = Molecule.from_file(self.lig + '.sdf') topology = Topology.from_molecules([molecule]) self.molecule = molecule self.topology = topology # Label using the smirnoff99Frosst force field self.forcefield = ForceField('openff-1.0.0.offxml') self.parameters = self.forcefield.label_molecules(topology)[0] def read_mol2(self): """ This is basically to get the charge, will later be deprecated when charges are transferable in openff """ with open(self.lig + '.mol2') as infile: cnt = -1 for line in infile: line = line.split() if len(line) == 9: cnt += 1 self.mapping[cnt] = [ line[0], # at idex line[1], # atname line[5].split('.')[0], # attype line[8], # charge line[2], # X coordinate line[3], # Y coordinte line[4] # Z coordinate ] self.total_charge += float(line[8]) if self.total_charge != 0.0: print('WARNING: residual charge {} check your mol2 file!'.format( self.total_charge)) def write_lib_Q(self): with open(self.lig + '.lib', 'w') as outfile: outfile.write('{} ! atoms no {} total charge {} \n\n'.format( '{LIG}', len(self.mapping), self.total_charge)) outfile.write("[info] \n SYBYLtype RESIDUE \n\n") #atom and charge block: outfile.write("[atoms] \n") for i, at in enumerate(self.mapping): outfile.write('{:>4s} {:10}{:11}{:>10s}\n'.format( self.mapping[at][0], self.mapping[at][1], self.mapping[at][1].lower(), self.mapping[at][3])) # bonded block outfile.write("\n[bonds]\n") for i, bond in enumerate(self.parameters['Bonds']): ai = self.mapping[bond[0]][1] aj = self.mapping[bond[1]][1] outfile.write('{:10s}{:}\n'.format(ai, aj)) # improper block outfile.write("\n[impropers]\n") for i, torsion in enumerate(self.parameters['ImproperTorsions']): ai = self.mapping[torsion[0]][1] aj = self.mapping[torsion[1]][1] ak = self.mapping[torsion[2]][1] al = self.mapping[torsion[3]][1] outfile.write('{:10}{:10}{:10}{}\n'.format(ai, aj, ak, al)) #outfile.write("\n[charge_groups]") #for i, atom in enumerate(self.mapping): # if self.mapping[atom][2] != 'H': # outfile.write('\n{}'.format(self.mapping[atom][1])) # for j, bond in enumerate(self.parameters['Bonds']): # if bond[0] == i: # if self.mapping[bond[1]][2] == 'H': # outfile.write(' {}'.format(self.mapping[bond[1]][1])) def write_prm_Q(self): if self.FF == 'AMBER14sb' and self.merge == True: prm_file = os.path.join(s.FF_DIR, 'AMBER14sb.prm') prm_file_out = self.FF + '_' + self.lig + '.prm' elif self.merge == False: prm_file = os.path.join(s.FF_DIR, 'NOMERGE.prm') prm_file_out = self.lig + '.prm' with open(prm_file) as infile, open(prm_file_out, 'w') as outfile: for line in infile: block = 0 outfile.write(line) if len(line) > 1: if line == "! Ligand vdW parameters\n": block = 1 if line == "! Ligand bond parameters\n": block = 2 if line == "! Ligand angle parameters\n": block = 3 if line == "! Ligand torsion parameters\n": block = 4 if line == "! Ligand improper parameters\n": block = 5 if block == 1: for (atom_indices, parameter) in self.parameters['vdW'].items(): ai = atom_indices[0] ai_name = self.mapping[ai][1].lower() # This is a bit hacky, check how to get the float out directly epsilon = float('{}'.format( parameter.epsilon).split()[0]) epsilon23 = epsilon / 2 # TO DO: CHECK IF THIS IS CORRECT! Rmin = '{}'.format(parameter.rmin_half) Rmin = Rmin.split()[0] Rmin = float(Rmin) mass = self.masses[self.mapping[ai][2]] outfile.write( """{:6}{: 8.3f}{: 10.3f}{: 10.3f}{: 10.3f}{: 10.3f}{:>10s}\n""" .format(ai_name, Rmin, 0.00, epsilon, Rmin, epsilon23, mass)) if block == 2: for (atom_indices, parameter) in self.parameters['Bonds'].items(): ai = atom_indices[0] ai_name = self.mapping[ai][1].lower() aj = atom_indices[1] aj_name = self.mapping[aj][1].lower() fc = float('{}'.format(parameter.k).split()[0]) l = float('{}'.format(parameter.length).split()[0]) outfile.write('{:10}{:10}{:10.1f}{:>10.3f}\n'.format( ai_name, aj_name, fc, l)) if block == 3: for (atom_indices, parameter) in self.parameters['Angles'].items(): ai = atom_indices[0] ai_name = self.mapping[ai][1].lower() aj = atom_indices[1] aj_name = self.mapping[aj][1].lower() ak = atom_indices[2] ak_name = self.mapping[ak][1].lower() fc = float('{}'.format(parameter.k).split()[0]) angle = float('{}'.format(parameter.angle).split()[0]) outfile.write( """{:10}{:10}{:10}{: 8.2f}{:>12.3f}\n""".format( ai_name, aj_name, ak_name, fc, angle)) if block == 4: for (atom_indices, parameter ) in self.parameters['ProperTorsions'].items(): forces = [] ai = atom_indices[0] ai_name = self.mapping[ai][1].lower() aj = atom_indices[1] aj_name = self.mapping[aj][1].lower() ak = atom_indices[2] ak_name = self.mapping[ak][1].lower() al = atom_indices[3] al_name = self.mapping[al][1].lower() max_phase = len(parameter.phase) # Now check if there are multiple minima for i in range(0, max_phase): fc = float('{}'.format(parameter.k[i]).split()[0]) phase = float('{}'.format( parameter.phase[i]).split()[0]) paths = int(parameter.idivf[i]) if i != max_phase - 1 and max_phase > 1: minimum = float(parameter.periodicity[i]) * -1 else: minimum = float(parameter.periodicity[i]) force = (fc, minimum, phase, paths) forces.append(force) for force in forces: outfile.write( """{:10}{:10}{:10}{:10}{:>10.3f}{:>10.3f}{:>10.3f}{:>5d}\n""" .format(ai_name, aj_name, ak_name, al_name, force[0], force[1], force[2], force[3])) if block == 5: for (atom_indices, parameter ) in self.parameters['ImproperTorsions'].items(): ai = atom_indices[0] ai_name = self.mapping[ai][1].lower() aj = atom_indices[1] aj_name = self.mapping[aj][1].lower() ak = atom_indices[2] ak_name = self.mapping[ak][1].lower() al = atom_indices[3] al_name = self.mapping[al][1].lower() fc = float('{}'.format(parameter.k[0]).split()[0]) phase = float('{}'.format( parameter.phase[0]).split()[0]) outfile.write( """{:10}{:10}{:10}{:10}{:10.3f}{:10.3f}\n""". format(ai_name, aj_name, ak_name, al_name, fc, phase)) def write_PDB(self): with open(self.lig + '.pdb', 'w') as outfile: for atom in self.mapping: ai = atom + 1 ai_name = self.mapping[atom][1] a_el = self.mapping[atom][2] ax = float(self.mapping[atom][4]) ay = float(self.mapping[atom][5]) az = float(self.mapping[atom][6]) at_entry = [ 'HETATM', # 0 ATOM/HETATM ai, # 1 ATOM serial number ai_name, # 2 ATOM name '', # 3 Alternate location indicator 'LIG', # 4 Residue name '', # 5 Chain identifier 1, # 6 Residue sequence number '', # 7 Code for insertion of residue ax, # 8 Orthogonal coordinates for X ay, # 9 Orthogonal coordinates for Y az, # 10 Orthogonal coordinates for Z 0.0, # 11 Occupancy 0.0, # 12 Temperature factor a_el, # 13 Element symbol '' # 14 Charge on atom ] outfile.write(IO.pdb_parse_out(at_entry) + '\n') def report_missing_parameters(self): """ Analyze a molecule using a provided ForceField, generating a report of any chemical groups in the molecule that are lacking parameters. Parameters ---------- molecule : an openforcefield.topology.FrozenMolecule The molecule to analyze forcefield : an openforcefield.typing.engine.smirnoff.ForceField The ForceField object to use Returns ------- missing_parameters : dict[tagname: list[dict[tagged_smiles:string, image:PIL.Image, atom indices:list[int]]]] A hierarchical dictionary, with first level keys indicating ForceField tag names (eg. "Bonds"), and first-level values which are lists of dictionaries. Each dictionary in this list reflects one missing parameter, and contains the following key:value pairs : * "image": PIL.Image * shows a 2D drawing, highlighting the feature that could not be parametrized * "tagged_smiles": string * SMILES of the whole molecule, tagging the atom indices which could not be parametrized * "atom_indices": tuple(int) * The indices of atoms which could not be parametrized """ highlight_color = (0.75, 0.75, 0.75) # Make deepcopies of both inputs, since we may modify them in this function forcefield = deepcopy(self.forcefield) molecule = deepcopy(self.molecule) # Set partial charges to placeholder values so that we can skip AM1-BCC # during parameterization molecule.partial_charges = (np.zeros(molecule.n_atoms) + 0.1) * unit.elementary_charge # Prepare dictionary to catch parameterization failure info success = False missing_params = {} while not success: # Try to parameterize the system, catching the exception if there is one. try: forcefield.create_openmm_system( molecule.to_topology(), charge_from_molecules=[molecule]) success = True except UnassignedValenceParameterException as e: success = False # Ensure that there is a list initialized for missing parameters # under this tagname handler_tagname = e.handler_class._TAGNAME if handler_tagname not in missing_params: missing_params[handler_tagname] = [] # Create a shortcut to the topology atom tuples attached to # the parametrization error top_atom_tuples = e.unassigned_topology_atom_tuples # Make a summary of the missing parameters from this attempt and add it to # the missing_params dict rdmol = molecule.to_rdkit() for top_atom_tuple in top_atom_tuples: orig_atom_indices = [ i.topology_atom_index for i in top_atom_tuple ] # Make a copy of the input RDMol so that we don't modify the original this_rdmol = deepcopy(rdmol) # Attach tags to relevant atoms so that a tagged SMILES can be written orig_rdatoms = [] for tag_idx, atom_idx in enumerate(orig_atom_indices): rdatom = this_rdmol.GetAtomWithIdx(atom_idx) rdatom.SetAtomMapNum(tag_idx + 1) orig_rdatoms.append(rdatom) tagged_smiles = Chem.MolToSmiles(this_rdmol) # Make tagged hydrogens into deuteriums so that RemoveHs doesn't get rid of them for rdatom in orig_rdatoms: if rdatom.GetAtomicNum() == 1: rdatom.SetIsotope(2) # Remove hydrogens, since they clutter up the 2D drawing # (tagged Hs are not removed, since they were converted to deuterium) h_less_rdmol = Chem.RemoveHs(this_rdmol) # Generate 2D coords, since drawing from 3D can look really weird Draw.rdDepictor.Compute2DCoords(h_less_rdmol) # Search over the molecule to find the indices of the tagged atoms # after hydrogen removal h_less_atom_indices = [None for i in orig_atom_indices] for rdatom in h_less_rdmol.GetAtoms(): # Convert deuteriums back into hydrogens if rdatom.GetAtomicNum() == 1: rdatom.SetIsotope(1) atom_map_num = rdatom.GetAtomMapNum() if atom_map_num == 0: continue h_less_atom_indices[atom_map_num - 1] = rdatom.GetIdx() # Once the new atom indices are found, use them to find the H-less # bond indices h_less_rdbonds = [] for i in range(len(h_less_atom_indices) - 1): rdbond = h_less_rdmol.GetBondBetweenAtoms( h_less_atom_indices[i], h_less_atom_indices[i + 1]) h_less_rdbonds.append(rdbond) h_less_bond_indices = [ bd.GetIdx() for bd in h_less_rdbonds ] # Create a 2D drawing of the molecule, highlighting the # parameterization failure highlight_atom_colors = { idx: highlight_color for idx in h_less_atom_indices } highlight_bond_colors = { idx: highlight_color for idx in h_less_bond_indices } image = Draw.MolsToGridImage( [h_less_rdmol], highlightAtomLists=[h_less_atom_indices], highlightBondLists=[h_less_bond_indices], molsPerRow=1, highlightAtomColors=[highlight_atom_colors], highlightBondColors=[highlight_bond_colors], subImgSize=(600, 600)) # Structure and append the relevant info to the missing_params dictionary param_description = { 'atom_indices': orig_atom_indices, 'image': image, 'tagged_smiles': tagged_smiles } missing_params[handler_tagname].append(param_description) # Add a "super generic" parameter to the top of this handler's ParameterList, # which will make it always find parameters for each term. This will prevent the same # parameterization exception from being raised in the next attempt. param_list = forcefield.get_parameter_handler( handler_tagname).parameters param_list.insert(0, super_generics[handler_tagname]) if success != True: print(missing_params) else: print('Parameters succesfully assigned')
class LegacyForceField: """ Class to hold legacy forcefield for typing and parameter assignment. Parameters ---------- forcefield : string name and version of the forcefield. """ def __init__(self, forcefield="gaff-1.81"): self.forcefield = forcefield self._prepare_forcefield() @staticmethod def _convert_to_off(mol): import openforcefield if isinstance(mol, esp.Graph): return mol.mol elif isinstance(mol, openforcefield.topology.molecule.Molecule): return mol elif isinstance(mol, rdkit.Chem.rdchem.Mol): return Molecule.from_rdkit(mol) elif "openeye" in str( type(mol)): # because we don't want to depend on OE return Molecule.from_openeye(mol) def _prepare_forcefield(self): if "gaff" in self.forcefield: self._prepare_gaff() elif "smirnoff" in self.forcefield: # do nothing for now self._prepare_smirnoff() elif "openff" in self.forcefield: self._prepare_openff() else: raise NotImplementedError def _prepare_openff(self): from openforcefield.typing.engines.smirnoff import ForceField self.FF = ForceField("%s.offxml" % self.forcefield) def _prepare_smirnoff(self): from openforcefield.typing.engines.smirnoff import ForceField self.FF = ForceField("test_forcefields/%s.offxml" % self.forcefield) def _prepare_gaff(self): import os import xml.etree.ElementTree as ET import openmmforcefields # get the openforcefields path openmmforcefields_path = os.path.dirname(openmmforcefields.__file__) # get the xml path ffxml_path = (openmmforcefields_path + "/ffxml/amber/gaff/ffxml/" + self.forcefield + ".xml") # parse xml tree = ET.parse(ffxml_path) root = tree.getroot() nonbonded = root.getchildren()[-1] atom_types = [atom.get("type") for atom in nonbonded.findall("Atom")] # remove redundant types [atom_types.remove(bad_type) for bad_type in REDUNDANT_TYPES.keys()] # compose the translation dictionaries str_2_idx = dict(zip(atom_types, range(len(atom_types)))) idx_2_str = dict(zip(range(len(atom_types)), atom_types)) # provide mapping for redundant types for bad_type, good_type in REDUNDANT_TYPES.items(): str_2_idx[bad_type] = str_2_idx[good_type] # make translation dictionaries attributes of self self._str_2_idx = str_2_idx self._idx_2_str = idx_2_str def _type_gaff(self, g): """ Type a molecular graph using gaff force fields. """ # assert the forcefield is indeed of gaff family assert "gaff" in self.forcefield # make sure mol is in OpenForceField format ` mol = g.mol # import template generator from openmmforcefields.generators import GAFFTemplateGenerator gaff = GAFFTemplateGenerator(molecules=mol, forcefield=self.forcefield) # create temporary directory for running antechamber import os import shutil import tempfile tempdir = tempfile.mkdtemp() prefix = "molecule" input_sdf_filename = os.path.join(tempdir, prefix + ".sdf") gaff_mol2_filename = os.path.join(tempdir, prefix + ".gaff.mol2") frcmod_filename = os.path.join(tempdir, prefix + ".frcmod") # write sdf for input mol.to_file(input_sdf_filename, file_format="sdf") # run antechamber gaff._run_antechamber( molecule_filename=input_sdf_filename, input_format="mdl", gaff_mol2_filename=gaff_mol2_filename, frcmod_filename=frcmod_filename, ) gaff._read_gaff_atom_types_from_mol2(gaff_mol2_filename, mol) gaff_types = [atom.gaff_type for atom in mol.atoms] shutil.rmtree(tempdir) # put types into graph object if g is None: g = esp.Graph(mol) g.nodes["n1"].data["legacy_typing"] = torch.tensor( [self._str_2_idx[atom] for atom in gaff_types]) return g def _parametrize_gaff(self, mol, g=None): raise NotImplementedError def _parametrize_smirnoff(self, g): # mol = self._convert_to_off(mol) forces = self.FF.label_molecules(g.mol.to_topology())[0] g.heterograph.apply_nodes( lambda node: { "k_ref": torch.Tensor([ forces["Bonds"][tuple(node.data["idxs"][idx].numpy())].k. value_in_unit(esp.units.FORCE_CONSTANT_UNIT) for idx in range(node.data["idxs"].shape[0]) ])[:, None] }, ntype="n2", ) g.heterograph.apply_nodes( lambda node: { "eq_ref": torch.Tensor([ forces["Bonds"][tuple(node.data["idxs"][idx].numpy())]. length.value_in_unit(esp.units.DISTANCE_UNIT) for idx in range(node.data["idxs"].shape[0]) ])[:, None] }, ntype="n2", ) g.heterograph.apply_nodes( lambda node: { "k_ref": torch.Tensor([ forces["Angles"][tuple(node.data["idxs"][idx].numpy())].k. value_in_unit(esp.units.ANGLE_FORCE_CONSTANT_UNIT) for idx in range(node.data["idxs"].shape[0]) ])[:, None] }, ntype="n3", ) g.heterograph.apply_nodes( lambda node: { "eq_ref": torch.Tensor([ forces["Angles"][tuple(node.data["idxs"][idx].numpy())]. angle.value_in_unit(esp.units.ANGLE_UNIT) for idx in range(node.data["idxs"].shape[0]) ])[:, None] }, ntype="n3", ) g.heterograph.apply_nodes( lambda node: { "epsilon_ref": torch.Tensor([ forces["vdW"][ (idx, )].epsilon.value_in_unit(esp.units.ENERGY_UNIT) for idx in range(g.heterograph.number_of_nodes("n1")) ])[:, None] }, ntype="n1", ) g.heterograph.apply_nodes( lambda node: { "sigma_ref": torch.Tensor([ forces["vdW"][(idx, )].rmin_half.value_in_unit( esp.units.DISTANCE_UNIT) for idx in range(g.heterograph.number_of_nodes("n1")) ])[:, None] }, ntype="n1", ) def apply_torsion(node, n_max_phases=6): phases = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) periodicity = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) k = torch.zeros( g.heterograph.number_of_nodes("n4"), n_max_phases, ) force = forces["ProperTorsions"] for idx in range(g.heterograph.number_of_nodes("n4")): idxs = tuple(node.data["idxs"][idx].numpy()) if idxs in force: _force = force[idxs] for sub_idx in range(len(_force.periodicity)): if hasattr(_force, "k%s" % sub_idx): k[idx, sub_idx] = getattr( _force, "k%s" % sub_idx).value_in_unit( esp.units.ENERGY_UNIT) phases[idx, sub_idx] = getattr( _force, "phase%s" % sub_idx).value_in_unit( esp.units.ANGLE_UNIT) periodicity[idx, sub_idx] = getattr( _force, "periodicity%s" % sub_idx) return { "k_ref": k, "periodicity_ref": periodicity, "phases_ref": phases, } g.heterograph.apply_nodes(apply_torsion, ntype="n4") return g def _multi_typing_smirnoff(self, g): # mol = self._convert_to_off(mol) forces = self.FF.label_molecules(g.mol.to_topology())[0] g.heterograph.apply_nodes( lambda node: { "legacy_typing": torch.Tensor([ int(forces["Bonds"][tuple(node.data["idxs"][idx].numpy())]. id[1:]) for idx in range(node.data["idxs"].shape[0]) ]).long() }, ntype="n2", ) g.heterograph.apply_nodes( lambda node: { "legacy_typing": torch.Tensor([ int(forces["Angles"][tuple(node.data["idxs"][idx].numpy())] .id[1:]) for idx in range(node.data["idxs"].shape[0]) ]).long() }, ntype="n3", ) g.heterograph.apply_nodes( lambda node: { "legacy_typing": torch.Tensor([ int(forces["vdW"][(idx, )].id[1:]) for idx in range(g.heterograph.number_of_nodes("n1")) ]).long() }, ntype="n1", ) return g def parametrize(self, g): """ Parametrize a molecular graph. """ if "smirnoff" in self.forcefield or "openff" in self.forcefield: return self._parametrize_smirnoff(g) else: raise NotImplementedError def typing(self, g): """ Type a molecular graph. """ if "gaff" in self.forcefield: return self._type_gaff(g) else: raise NotImplementedError def multi_typing(self, g): """ Type a molecular graph for hetero nodes. """ if "smirnoff" in self.forcefield: return self._multi_typing_smirnoff(g) else: raise NotImplementedError def __call__(self, *args, **kwargs): return self.typing(*args, **kwargs)
class Run(object): """ Create FEP files from a common substructure for a given set of ligands """ def __init__(self, lig, FF, merge, *args, **kwargs): """ The init method is a kind of constructor, called when an instance of the class is created. The method serves to initialize what you want to do with the object. """ self.lig = lig self.FF = FF self.merge = merge self.ff_list = [] self.mapping = {} self.total_charge = 0 self.masses = { "H": "1.0080", "C": "12.0110", "N": "14.0070", "O": "15.9994", "F": "19.0000", "P": "30.9700", "S": "32.0600", "Cl": "35.0000", "Br": "79.9000", "I": "126.90", "DUM": "0.0000" } def openff(self): # Load the molecule (for now mol2, until charges are saved on sdf) molecule = Molecule.from_file(self.lig + '.mol2') topology = Topology.from_molecules([molecule]) # Label using the smirnoff99Frosst force field self.forcefield = ForceField('smirnoff99Frosst.offxml') self.parameters = self.forcefield.label_molecules(topology)[0] def read_mol2(self): """ This is basically to get the charge, will later be deprecated when charges are transferable in openff """ with open(self.lig + '.mol2') as infile: cnt = -1 for line in infile: line = line.split() if len(line) == 9: cnt += 1 self.mapping[cnt] = [ line[0], # at idex line[1], # atname line[5].split('.')[0], # attype line[8], # charge line[2], # X coordinate line[3], # Y coordinte line[4] # Z coordinate ] self.total_charge += float(line[8]) if self.total_charge != 0.0: print('WARNING: residual charge {} check your mol2 file!'.format( self.total_charge)) def write_lib_Q(self): with open(self.lig + '.lib', 'w') as outfile: outfile.write('{} ! atoms no {} total charge {} \n\n'.format( '{LIG}', len(self.mapping), self.total_charge)) outfile.write("[info] \n SYBYLtype RESIDUE \n\n") #atom and charge block: outfile.write("[atoms] \n") for i, at in enumerate(self.mapping): outfile.write('{:>4s} {:10}{:11}{:>10s}\n'.format( self.mapping[at][0], self.mapping[at][1], self.mapping[at][1].lower(), self.mapping[at][3])) # bonded block outfile.write("\n[bonds]\n") for i, bond in enumerate(self.parameters['Bonds']): ai = self.mapping[bond[0]][1] aj = self.mapping[bond[1]][1] outfile.write('{:10s}{:}\n'.format(ai, aj)) # improper block outfile.write("\n[impropers]\n") for i, torsion in enumerate(self.parameters['ImproperTorsions']): ai = self.mapping[torsion[0]][1] aj = self.mapping[torsion[1]][1] ak = self.mapping[torsion[2]][1] al = self.mapping[torsion[3]][1] outfile.write('{:10}{:10}{:10}{}\n'.format(ai, aj, ak, al)) #outfile.write("\n[charge_groups]") #for i, atom in enumerate(self.mapping): # if self.mapping[atom][2] != 'H': # outfile.write('\n{}'.format(self.mapping[atom][1])) # for j, bond in enumerate(self.parameters['Bonds']): # if bond[0] == i: # if self.mapping[bond[1]][2] == 'H': # outfile.write(' {}'.format(self.mapping[bond[1]][1])) def write_prm_Q(self): if self.FF == 'AMBER14sb' and self.merge == True: prm_file = os.path.join(s.FF_DIR, 'AMBER14sb.prm') prm_file_out = self.FF + '_' + self.lig + '.prm' elif self.merge == False: prm_file = os.path.join(s.FF_DIR, 'NOMERGE.prm') prm_file_out = self.lig + '.prm' with open(prm_file) as infile, open(prm_file_out, 'w') as outfile: for line in infile: block = 0 outfile.write(line) if len(line) > 1: if line == "! Ligand vdW parameters\n": block = 1 if line == "! Ligand bond parameters\n": block = 2 if line == "! Ligand angle parameters\n": block = 3 if line == "! Ligand torsion parameters\n": block = 4 if line == "! Ligand improper parameters\n": block = 5 if block == 1: for (atom_indices, parameter) in self.parameters['vdW'].items(): ai = atom_indices[0] ai_name = self.mapping[ai][1].lower() # This is a bit hacky, check how to get the float out directly epsilon = float('{}'.format( parameter.epsilon).split()[0]) epsilon23 = epsilon / 2 # TO DO: CHECK IF THIS IS CORRECT!! Rmin = float('{}'.format( parameter.sigma).split()[0]) / 2 mass = self.masses[self.mapping[ai][2]] outfile.write( """{:6}{: 8.3f}{: 10.3f}{: 10.3f}{: 10.3f}{: 10.3f}{:>10s}\n""" .format(ai_name, Rmin, 0.00, epsilon, Rmin, epsilon23, mass)) if block == 2: for (atom_indices, parameter) in self.parameters['Bonds'].items(): ai = atom_indices[0] ai_name = self.mapping[ai][1].lower() aj = atom_indices[1] aj_name = self.mapping[aj][1].lower() fc = float('{}'.format(parameter.k).split()[0]) l = float('{}'.format(parameter.length).split()[0]) outfile.write('{:10}{:10}{:10.1f}{:>10.3f}\n'.format( ai_name, aj_name, fc, l)) if block == 3: for (atom_indices, parameter) in self.parameters['Angles'].items(): ai = atom_indices[0] ai_name = self.mapping[ai][1].lower() aj = atom_indices[1] aj_name = self.mapping[aj][1].lower() ak = atom_indices[2] ak_name = self.mapping[ak][1].lower() fc = float('{}'.format(parameter.k).split()[0]) angle = float('{}'.format(parameter.angle).split()[0]) outfile.write( """{:10}{:10}{:10}{: 8.2f}{:>12.3f}\n""".format( ai_name, aj_name, ak_name, fc, angle)) if block == 4: for (atom_indices, parameter ) in self.parameters['ProperTorsions'].items(): forces = [] ai = atom_indices[0] ai_name = self.mapping[ai][1].lower() aj = atom_indices[1] aj_name = self.mapping[aj][1].lower() ak = atom_indices[2] ak_name = self.mapping[ak][1].lower() al = atom_indices[3] al_name = self.mapping[al][1].lower() max_phase = len(parameter.phase) # Now check if there are multiple minima for i in range(0, max_phase): fc = float('{}'.format(parameter.k[i]).split()[0]) phase = float('{}'.format( parameter.phase[i]).split()[0]) paths = int(parameter.idivf[i]) if i != max_phase - 1 and max_phase > 1: minimum = float(parameter.periodicity[i]) * -1 else: minimum = float(parameter.periodicity[i]) force = (fc, minimum, phase, paths) forces.append(force) for force in forces: outfile.write( """{:10}{:10}{:10}{:10}{:>10.3f}{:>10.3f}{:>10.3f}{:>5d}\n""" .format(ai_name, aj_name, ak_name, al_name, force[0], force[1], force[2], force[3])) if block == 5: for (atom_indices, parameter ) in self.parameters['ImproperTorsions'].items(): ai = atom_indices[0] ai_name = self.mapping[ai][1].lower() aj = atom_indices[1] aj_name = self.mapping[aj][1].lower() ak = atom_indices[2] ak_name = self.mapping[ak][1].lower() al = atom_indices[3] al_name = self.mapping[al][1].lower() fc = float('{}'.format(parameter.k[0]).split()[0]) phase = float('{}'.format( parameter.phase[0]).split()[0]) outfile.write( """{:10}{:10}{:10}{:10}{:10.3f}{:10.3f}\n""". format(ai_name, aj_name, ak_name, al_name, fc, phase)) def write_PDB(self): with open(self.lig + '.pdb', 'w') as outfile: for atom in self.mapping: ai = atom + 1 ai_name = self.mapping[atom][1] a_el = self.mapping[atom][2] ax = float(self.mapping[atom][4]) ay = float(self.mapping[atom][5]) az = float(self.mapping[atom][6]) at_entry = [ 'HETATM', # 0 ATOM/HETATM ai, # 1 ATOM serial number ai_name, # 2 ATOM name '', # 3 Alternate location indicator 'LIG', # 4 Residue name '', # 5 Chain identifier 1, # 6 Residue sequence number '', # 7 Code for insertion of residue ax, # 8 Orthogonal coordinates for X ay, # 9 Orthogonal coordinates for Y az, # 10 Orthogonal coordinates for Z 0.0, # 11 Occupancy 0.0, # 12 Temperature factor a_el, # 13 Element symbol '' # 14 Charge on atom ] outfile.write(IO.pdb_parse_out(at_entry) + '\n')
def find_smirks_parameters(parameter_tag='vdW', *smiles_patterns): """Finds those force field parameters with a given tag which would be assigned to a specified set of molecules defined by the their smiles patterns. Parameters ---------- parameter_tag: str The tag of the force field parameters to find. smiles_patterns: str The smiles patterns to assign the force field parameters to. Returns ------- dict of str and list of str A dictionary with keys of parameter smirks patterns, and values of lists of smiles patterns which would utilize those parameters. """ stdout_ = sys.stdout # Keep track of the previous value. stderr_ = sys.stderr # Keep track of the previous value. stream = StringIO() sys.stdout = stream sys.stderr = stream force_field = ForceField('smirnoff99Frosst-1.1.0.offxml') sys.stdout = stdout_ # restore the previous stdout. sys.stderr = stderr_ parameter_handler = force_field.get_parameter_handler(parameter_tag) smiles_by_parameter_smirks = {} # Initialize the array with all possible smirks pattern # to make it easier to identify which are missing. for parameter in parameter_handler.parameters: if parameter.smirks in smiles_by_parameter_smirks: continue smiles_by_parameter_smirks[parameter.smirks] = set() # Populate the dictionary using the open force field toolkit. for smiles in smiles_patterns: if smiles not in cached_smirks_parameters or parameter_tag not in cached_smirks_parameters[smiles]: try: molecule = Molecule.from_smiles(smiles) except UndefinedStereochemistryError: # Skip molecules with undefined stereochemistry. continue topology = Topology.from_molecules([molecule]) if smiles not in cached_smirks_parameters: cached_smirks_parameters[smiles] = {} if parameter_tag not in cached_smirks_parameters[smiles]: cached_smirks_parameters[smiles][parameter_tag] = [] cached_smirks_parameters[smiles][parameter_tag] = [ parameter.smirks for parameter in force_field.label_molecules(topology)[0][parameter_tag].values() ] parameters_with_tag = cached_smirks_parameters[smiles][parameter_tag] for smirks in parameters_with_tag: smiles_by_parameter_smirks[smirks].add(smiles) return smiles_by_parameter_smirks
def probe_by_parameter(probe_param, ffxml, subdir, all_probe_mols, inpickle): """ For a single force field parameter, (1) find all molecules that use this parameter, and (2) save them to a mol2 file labeled with the parameter ID. Parameters ---------- probe_param : string Name of the parameter to investigate ffxml : string Name of the FFXML force field file subdir : string Name of subdirectory in which to save mol2 files for each parameter all_probe_mols : dict key is string of a parameter id to be probed; value is an empty list inpickle : string Name of the pickle file from output of tailed_parameters.py Returns ------- all_probe_mols : dict key is string of a parameter id to be probed; value is a list of oegraphmols with this parameter id """ prefix_dict = {'a':'Angles', 'b':'Bonds', 'i':'ImproperTorsions', 'n':'vdW', 't':'ProperTorsions'} # load parameter dictionaries from pickle with open(inpickle, 'rb') as f: data_all, data_out = pickle.load(f) params_id_out = data_out['params_id'] # find the first mol in outlier set with given param mols_with_probe = list(params_id_out[probe_param]) probe_mol = Molecule.from_smiles(mols_with_probe[0], allow_undefined_stereo=True) topology = Topology.from_molecules([probe_mol]) # load in force field ff = ForceField(ffxml) # run molecule labeling molecule_force_list = ff.label_molecules(topology) # get the smirks pattern associated with param prefix = probe_param[0] force_dict = molecule_force_list[0][prefix_dict[prefix]] for (k, v) in force_dict.items(): if v.id == probe_param: probe_smirks = v.smirks break print(f"\n=====\n{probe_param}: {probe_smirks}\n=====") # find all molecules with this parameter and save to file. # conformers are not considered here so these smiles refer to # an arbitrary conformer assigned in dict after zip # (since duplicate keys are removed in dict) outfile = f'{subdir}/param_{probe_param}.mol2' ofs = oechem.oemolostream() if not ofs.open(outfile): oechem.OEThrow.Fatal("Unable to open %s for writing" % outfile) for m in mols_with_probe: key = data_out['smi_dict'][m] print(f"writing out {key}") mymol = data_out['mols_dict'][key]['structure'] oechem.OEWriteConstMolecule(ofs, mymol) # save to write full pdf later on all_probe_mols[probe_param].append(oechem.OEGraphMol(mymol)) return all_probe_mols
class ForceFieldEditor: def __init__(self, forcefield_name: str): """ Gather the forcefield ready for manipulation. Parameters ---------- forcefield_name: str The string of the target forcefield path. Notes ------ This will always try to strip the constraints parameter handler as the FF should be unconstrained for fitting. """ self.forcefield = ForceField(forcefield_name, allow_cosmetic_attributes=True) # try and strip a constraint handler try: del self.forcefield._parameter_handlers["Constraints"] except KeyError: pass def add_smirks( self, smirks: List[Union[AtomSmirks, AngleSmirks, BondSmirks, TorsionSmirks]], parameterize: bool = True, ) -> None: """ Work out which type of smirks this is and add it to the forcefield, if this is not a bespoke parameter update the value in the forcefield. """ _smirks_conversion = { SmirksType.Bonds: BondHandler.BondType, SmirksType.Angles: AngleHandler.AngleType, SmirksType.ProperTorsions: ProperTorsionHandler.ProperTorsionType, SmirksType.Vdw: vdWHandler.vdWType, } _smirks_ids = { SmirksType.Bonds: "b", SmirksType.Angles: "a", SmirksType.ProperTorsions: "t", SmirksType.Vdw: "n", } new_params = {} for smirk in smirks: if smirk.type not in new_params: new_params[smirk.type] = [ smirk, ] else: if smirk not in new_params[smirk.type]: new_params[smirk.type].append(smirk) for smirk_type, parameters in new_params.items(): current_params = self.forcefield.get_parameter_handler( smirk_type).parameters no_params = len(current_params) for i, parameter in enumerate(parameters, start=2): smirk_data = parameter.to_off_smirks() if not parameterize: del smirk_data["parameterize"] # check if the parameter is new try: current_param = current_params[parameter.smirks] smirk_data["id"] = current_param.id # update the parameter using the init to get around conditional assigment current_param.__init__(**smirk_data) except IndexError: smirk_data["id"] = _smirks_ids[smirk_type] + str( no_params + i) current_params.append( _smirks_conversion[smirk_type](**smirk_data)) def label_molecule(self, molecule: off.Molecule) -> Dict[str, str]: """ Type the molecule with the forcefield and return a molecule parameter dictionary. Parameters ---------- molecule: off.Molecule The openforcefield.topology.Molecule that should be labeled by the forcefield. Returns ------- Dict[str, str] A dictionary of each parameter assigned to molecule organised by parameter handler type. """ return self.forcefield.label_molecules(molecule.to_topology())[0] def get_smirks_parameters( self, molecule: off.Molecule, atoms: List[Tuple[int, ...]] ) -> List[Union[AtomSmirks, AngleSmirks, BondSmirks, TorsionSmirks]]: """ For a given molecule label it and get back the smirks patterns and parameters for the requested atoms. """ _atoms_to_params = { 1: SmirksType.Vdw, 2: SmirksType.Bonds, 3: SmirksType.Angles, 4: SmirksType.ProperTorsions, } smirks = [] labels = self.label_molecule(molecule=molecule) for atom_ids in atoms: # work out the parameter type from the length of the tuple smirk_class = _atoms_to_params[len(atom_ids)] # now we can get the handler type using the smirk type off_param = labels[smirk_class.value][atom_ids] smirk = smirks_from_off(off_smirks=off_param) smirk.atoms.add(atom_ids) if smirk not in smirks: smirks.append(smirk) else: # update the covered atoms index = smirks.index(smirk) smirks[index].atoms.add(atom_ids) return smirks def update_smirks_parameters( self, smirks: Iterable[Union[AtomSmirks, AngleSmirks, BondSmirks, TorsionSmirks]], ) -> None: """ Take a list of input smirks parameters and update the values of the parameters using the given forcefield in place. Parameters ---------- smirks : Iterable[Union[AtomSmirks, AngleSmirks, BondSmirks, TorsionSmirks]] An iterable containing smirks schemas that are to be updated. """ for smirk in smirks: new_parameter = self.forcefield.get_parameter_handler( smirk.type).parameters[smirk.smirks] # now we just need to update the smirks with the new values smirk.update_parameters(off_smirk=new_parameter) def get_initial_parameters( self, molecule: off.Molecule, smirks: List[Union[AtomSmirks, AngleSmirks, BondSmirks, TorsionSmirks]], clear_existing: bool = True, ) -> List[Union[AtomSmirks, AngleSmirks, BondSmirks, TorsionSmirks]]: """ Find the initial parameters assigned to the atoms in the given smirks pattern and update the values to match the forcefield. """ labels = self.label_molecule(molecule=molecule) # now find the atoms for smirk in smirks: parameters = labels[smirk.type] if smirk.type == SmirksType.ProperTorsions: # here we can combine multiple parameter types # TODO is this needed? openff_params = [] for atoms in smirk.atoms: param = parameters[atoms] openff_params.append(param) # now check if they are different types types = set([param.id for param in openff_params]) # now update the parameter smirk.update_parameters(off_smirk=openff_params[0], clear_existing=clear_existing) # if there is more than expand the k terms if len(types) > 1: for param in openff_params[1:]: smirk.update_parameters(param, clear_existing=False) else: atoms = list(smirk.atoms)[0] param = parameters[atoms] smirk.update_parameters(off_smirk=param, clear_existing=True) return smirks
class SMIRNOFF(OpenMM): """ Derived from Engine object for carrying out OpenMM calculations that use the SMIRNOFF force field. """ def __init__(self, name="openmm", **kwargs): self.valkwd = ['ffxml', 'pdb', 'mol2', 'platname', 'precision', 'mmopts', 'vsite_bonds', 'implicit_solvent', 'restrain_k', 'freeze_atoms'] super(SMIRNOFF,self).__init__(name=name, **kwargs) def readsrc(self, **kwargs): """ SMIRNOFF simulations always require the following passed in via kwargs: Parameters ---------- pdb : string Name of a .pdb file containing the topology of the system mol2 : list A list of .mol2 file names containing the molecule/residue templates of the system Also provide 1 of the following, containing the coordinates to be used: mol : Molecule forcebalance.Molecule object coords : string Name of a file (readable by forcebalance.Molecule) This could be the same as the pdb argument from above. """ pdbfnm = kwargs.get('pdb') # Determine the PDB file name. if not pdbfnm: raise RuntimeError('Name of PDB file not provided.') elif not os.path.exists(pdbfnm): logger.error("%s specified but doesn't exist\n" % pdbfnm) raise RuntimeError if 'mol' in kwargs: self.mol = kwargs['mol'] elif 'coords' in kwargs: if not os.path.exists(kwargs['coords']): logger.error("%s specified but doesn't exist\n" % kwargs['coords']) raise RuntimeError self.mol = Molecule(kwargs['coords']) else: logger.error('Must provide either a molecule object or coordinate file.\n') raise RuntimeError # Here we cannot distinguish the .mol2 files linked by the target # vs. the .mol2 files to be provided by the force field. # But we can assume that these files should exist when this function is called. self.mol2 = kwargs.get('mol2') if self.mol2: for fnm in self.mol2: if not os.path.exists(fnm): if hasattr(self, 'FF') and fnm in self.FF.fnms: continue logger.error("%s doesn't exist" % fnm) raise RuntimeError else: logger.error("Must provide a list of .mol2 files.\n") self.abspdb = os.path.abspath(pdbfnm) mpdb = Molecule(pdbfnm) for i in ["chain", "atomname", "resid", "resname", "elem"]: self.mol.Data[i] = mpdb.Data[i] # Store a separate copy of the molecule for reference restraint positions. self.ref_mol = deepcopy(self.mol) def prepare(self, pbc=False, mmopts={}, **kwargs): """ Prepare the calculation. Note that we don't create the Simulation object yet, because that may depend on MD integrator parameters, thermostat, barostat etc. This is mostly copied and modified from openmmio.py's OpenMM.prepare(), but we are calling ForceField() from the OpenFF toolkit and ignoring AMOEBA stuff. """ self.pdb = PDBFile(self.abspdb) # Create the OpenFF ForceField object. if hasattr(self, 'FF'): self.offxml = [self.FF.offxml] self.forcefield = self.FF.openff_forcefield else: self.offxml = listfiles(kwargs.get('offxml'), 'offxml', err=True) self.forcefield = OpenFF_ForceField(*self.offxml) ## Load mol2 files for smirnoff topology openff_mols = [] for fnm in self.mol2: try: mol = OffMolecule.from_file(fnm) except Exception as e: logger.error("Error when loading %s" % fnm) raise e openff_mols.append(mol) self.off_topology = OffTopology.from_openmm(self.pdb.topology, unique_molecules=openff_mols) # used in create_simulation() self.mod = Modeller(self.pdb.topology, self.pdb.positions) ## OpenMM options for setting up the System. self.mmopts = dict(mmopts) ## Specify frozen atoms and restraint force constant if 'restrain_k' in kwargs: self.restrain_k = kwargs['restrain_k'] if 'freeze_atoms' in kwargs: self.freeze_atoms = kwargs['freeze_atoms'][:] ## Set system options from ForceBalance force field options. fftmp = False if hasattr(self,'FF'): self.mmopts['rigidWater'] = self.FF.rigid_water if not all([os.path.exists(f) for f in self.FF.fnms]): # If the parameter files don't already exist, create them for the purpose of # preparing the engine, but then delete them afterward. fftmp = True self.FF.make(np.zeros(self.FF.np)) ## Set system options from periodic boundary conditions. self.pbc = pbc ## print warning for 'nonbonded_cutoff' keywords if 'nonbonded_cutoff' in kwargs: logger.warning("nonbonded_cutoff keyword ignored because it's set in the offxml file\n") ## Generate OpenMM-compatible positions self.xyz_omms = [] for I in range(len(self.mol)): position = self.mol.xyzs[I] * angstrom # xyz_omm = [Vec3(i[0],i[1],i[2]) for i in xyz]*angstrom # An extra step with adding virtual particles # mod = Modeller(self.pdb.topology, xyz_omm) # LPW commenting out because we don't have virtual sites yet. # mod.addExtraParticles(self.forcefield) if self.pbc: # Obtain the periodic box if self.mol.boxes[I].alpha != 90.0 or self.mol.boxes[I].beta != 90.0 or self.mol.boxes[I].gamma != 90.0: logger.error('OpenMM cannot handle nonorthogonal boxes.\n') raise RuntimeError box_omm = np.diag([self.mol.boxes[I].a, self.mol.boxes[I].b, self.mol.boxes[I].c]) * angstrom else: box_omm = None # Finally append it to list. self.xyz_omms.append((position, box_omm)) ## Build a topology and atom lists. Top = self.pdb.topology Atoms = list(Top.atoms()) Bonds = [(a.index, b.index) for a, b in list(Top.bonds())] # vss = [(i, [system.getVirtualSite(i).getParticle(j) for j in range(system.getVirtualSite(i).getNumParticles())]) \ # for i in range(system.getNumParticles()) if system.isVirtualSite(i)] self.AtomLists = defaultdict(list) self.AtomLists['Mass'] = [a.element.mass.value_in_unit(dalton) if a.element is not None else 0 for a in Atoms] self.AtomLists['ParticleType'] = ['A' if m >= 1.0 else 'D' for m in self.AtomLists['Mass']] self.AtomLists['ResidueNumber'] = [a.residue.index for a in Atoms] self.AtomMask = [a == 'A' for a in self.AtomLists['ParticleType']] self.realAtomIdxs = [i for i, a in enumerate(self.AtomMask) if a is True] if hasattr(self,'FF') and fftmp: for f in self.FF.fnms: os.unlink(f) def update_simulation(self, **kwargs): """ Create the simulation object, or update the force field parameters in the existing simulation object. This should be run when we write a new force field XML file. """ if len(kwargs) > 0: self.simkwargs = kwargs # Because self.forcefield is being updated in forcebalance.forcefield.FF.make() # there is no longer a need to create a new force field object here. try: self.system = self.forcefield.create_openmm_system(self.off_topology) except Exception as error: logger.error("Error when creating system for %s" % self.mol2) raise error # Commenting out all virtual site stuff for now. # self.vsinfo = PrepareVirtualSites(self.system) self.nbcharges = np.zeros(self.system.getNumParticles()) #---- # If the virtual site parameters have changed, # the simulation object must be remade. #---- # vsprm = GetVirtualSiteParameters(self.system) # if hasattr(self,'vsprm') and len(self.vsprm) > 0 and np.max(np.abs(vsprm - self.vsprm)) != 0.0: # if hasattr(self, 'simulation'): # delattr(self, 'simulation') # self.vsprm = vsprm.copy() if hasattr(self, 'simulation'): UpdateSimulationParameters(self.system, self.simulation) else: self.create_simulation(**self.simkwargs) def optimize(self, shot=0, align=True, crit=1e-4): return super(SMIRNOFF,self).optimize(shot=shot, align=align, crit=crit, disable_vsite=True) def interaction_energy(self, fraga, fragb): """ Calculate the interaction energy for two fragments. Because this creates two new objects and requires passing in the mol2 argument, the codes are copied and modified from the OpenMM class. """ self.update_simulation() if self.name == 'A' or self.name == 'B': logger.error("Don't name the engine A or B!\n") raise RuntimeError # Create two subengines. if hasattr(self,'target'): if not hasattr(self,'A'): self.A = SMIRNOFF(name="A", mol=self.mol.atom_select(fraga), mol2=self.mol2, target=self.target) if not hasattr(self,'B'): self.B = SMIRNOFF(name="B", mol=self.mol.atom_select(fragb), mol2=self.mol2, target=self.target) else: if not hasattr(self,'A'): self.A = SMIRNOFF(name="A", mol=self.mol.atom_select(fraga), mol2=self.mol2, platname=self.platname, \ precision=self.precision, offxml=self.offxml, mmopts=self.mmopts) if not hasattr(self,'B'): self.B = SMIRNOFF(name="B", mol=self.mol.atom_select(fragb), mol2=self.mol2, platname=self.platname, \ precision=self.precision, offxml=self.offxml, mmopts=self.mmopts) # Interaction energy needs to be in kcal/mol. D = self.energy() A = self.A.energy() B = self.B.energy() return (D - A - B) / 4.184 def get_smirks_counter(self): """Get a counter for the time of appreance of each SMIRKS""" smirks_counter = Counter() molecule_force_list = self.forcefield.label_molecules(self.off_topology) for mol_idx, mol_forces in enumerate(molecule_force_list): for force_tag, force_dict in mol_forces.items(): # e.g. force_tag = 'Bonds' for parameter in force_dict.values(): smirks_counter[parameter.smirks] += 1 return smirks_counter
import time print("Starting simulation") start = time.process_time() # Run the simulation simulation.step(num_steps) end = time.process_time() print("Elapsed time %.2f seconds" % (end - start)) print("Hurra!") # In[273]: ff_applied_parameters = forcefield.label_molecules(off_topology)[0] ff_values = [] ff_valuefile = open('ff_valuefile.txt', 'w+') for atoms, bonds in ff_applied_parameters['Bonds'].items(): ff_valuefile.write(F'{atoms},{bonds}') ff_valuefile.write('\n') ff_valuefile.close() # In[274]: import numpy ff_valuefile = open('ff_valuefile.txt', 'r')