def test_atom_map(self): """Test get atom map""" from openeye import oechem tagged_smiles = '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]' mol_1 = openeye.smiles_to_oemol('CC[N+]#C') inf = get_fn('ethylmethylidyneamonium.mol2') ifs = oechem.oemolistream(inf) mol_2 = oechem.OEMol() oechem.OEReadMolecule(ifs, mol_2) atom_map = utils.get_atom_map(tagged_smiles, mol_1) for i, mapping in enumerate(atom_map): atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping])) atom_1.SetAtomicNum(i+1) atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping-1)) atom_2.SetAtomicNum(i+1) self.assertEqual(oechem.OECreateCanSmiString(mol_1), oechem.OECreateCanSmiString(mol_2)) # Test aromatic molecule tagged_smiles = '[H:10][c:4]1[c:3]([c:2]([c:1]([c:6]([c:5]1[H:11])[H:12])[C:7]([H:13])([H:14])[H:15])[H:8])[H:9]' mol_1 = openeye.smiles_to_oemol('Cc1ccccc1') inf = get_fn('toluene.mol2') ifs = oechem.oemolistream(inf) mol_2 = oechem.OEMol() oechem.OEReadMolecule(ifs, mol_2) atom_map = utils.get_atom_map(tagged_smiles, mol_1) for i, mapping in enumerate(atom_map): atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping])) atom_1.SetAtomicNum(i+1) atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping-1)) atom_2.SetAtomicNum(i+1) self.assertEqual(oechem.OECreateCanSmiString(mol_1), oechem.OECreateCanSmiString(mol_2))
def SmilesToFragments(smiles, fgroup_smarts, bondOrderThreshold=1.2, chargesMol=True): """ Fragment molecule at bonds below Bond Order Threshold Parameters ---------- smiles: str smiles string of molecule to fragment Returns ------- frags: list of OE AtomBondSets """ # Charge molecule mol = oechem.OEGraphMol() oemol = openeye.smiles_to_oemol(smiles) charged = openeye.get_charges(oemol, keep_confs=1) # Tag functional groups _tag_fgroups(charged, fgroups_smarts=fgroup_smarts) # Generate fragments G = OeMolToGraph(charged) subraphs = FragGraph(G, bondOrderThreshold=bondOrderThreshold) frags = [] for subraph in subraphs: frags.append(subgraphToAtomBondSet(G, subraph, charged)) if chargesMol: return frags, charged else: return frags
def enumerate_conformations(name, smiles): """Generate geometry and run epik.""" # Generate molecule geometry with OpenEye print "Generating molecule {}".format(name) oe_molecule = openeye.smiles_to_oemol(smiles) try: oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) except RuntimeError as e: traceback.print_exc() print "Skipping molecule " + name return # Create output subfolder output_basepath = os.path.join(output_dir, name) if not os.path.isdir(output_basepath): os.mkdir(output_basepath) output_basepath = os.path.join(output_basepath, name) # Save mol2 file with residue name = first three uppercase letters print "Running epik on molecule {}".format(name) mol2_file_path = output_basepath + '-input.mol2' residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] openeye.molecule_to_mol2(oe_molecule, mol2_file_path, residue_name=residue_name) # Run epik on mol2 file mae_file_path = output_basepath + '-epik.mae' schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=True, max_structures=32, ph_tolerance=10.0) # Convert maestro file to sdf and mol2 schrodinger.run_structconvert(mae_file_path, output_basepath + '-epik.sdf') schrodinger.run_structconvert(mae_file_path, output_basepath + '-epik.mol2')
def test_atom_map_order(self): """Test atom map""" from openeye import oechem tagged_smiles = '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]' mol_from_tagged_smiles = openeye.smiles_to_oemol(tagged_smiles) atom_map = utils.get_atom_map(tagged_smiles, mol_from_tagged_smiles) # Compare atom map to tag for i in range(1, len(atom_map) +1): atom_1 = mol_from_tagged_smiles.GetAtom(oechem.OEHasAtomIdx(atom_map[i])) self.assertEqual(i, atom_1.GetMapIdx())
def test_atom_map_order(self): """Test atom map""" from openeye import oechem tagged_smiles = '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]' mol_from_tagged_smiles = openeye.smiles_to_oemol(tagged_smiles) atom_map = utils.get_atom_map(tagged_smiles, mol_from_tagged_smiles) # Compare atom map to tag for i in range(1, len(atom_map) + 1): atom_1 = mol_from_tagged_smiles.GetAtom( oechem.OEHasAtomIdx(atom_map[i])) self.assertEqual(i, atom_1.GetMapIdx())
def test_atom_map(self): """Test get atom map""" from openeye import oechem tagged_smiles = '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]' mol_1 = openeye.smiles_to_oemol('CC[N+]#C') inf = get_fn('ethylmethylidyneamonium.mol2') ifs = oechem.oemolistream(inf) mol_2 = oechem.OEMol() oechem.OEReadMolecule(ifs, mol_2) atom_map = utils.get_atom_map(tagged_smiles, mol_1) for i, mapping in enumerate(atom_map): atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping])) atom_1.SetAtomicNum(i + 1) atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping - 1)) atom_2.SetAtomicNum(i + 1) self.assertEqual(oechem.OECreateCanSmiString(mol_1), oechem.OECreateCanSmiString(mol_2)) # Test aromatic molecule tagged_smiles = '[H:10][c:4]1[c:3]([c:2]([c:1]([c:6]([c:5]1[H:11])[H:12])[C:7]([H:13])([H:14])[H:15])[H:8])[H:9]' mol_1 = openeye.smiles_to_oemol('Cc1ccccc1') inf = get_fn('toluene.mol2') ifs = oechem.oemolistream(inf) mol_2 = oechem.OEMol() oechem.OEReadMolecule(ifs, mol_2) atom_map = utils.get_atom_map(tagged_smiles, mol_1) for i, mapping in enumerate(atom_map): atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping])) atom_1.SetAtomicNum(i + 1) atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping - 1)) atom_2.SetAtomicNum(i + 1) self.assertEqual(oechem.OECreateCanSmiString(mol_1), oechem.OECreateCanSmiString(mol_2))
def get_atom_map(tagged_smiles, molecule=None): """ Returns a dictionary that maps tag on SMILES to atom index in molecule. Parameters ---------- tagged_smiles: str index-tagged explicit hydrogen SMILES string molecule: OEMol molecule to generate map for. If None, a new OEMol will be generated from the tagged SMILES, the map will map to this molecule and it will be returned. Returns ------- atom_map: dict a dictionary that maps tag to atom index {tag:idx} molecule: OEMol If a molecule was not provided, the generated molecule will be returned. """ if molecule is None: molecule = openeye.smiles_to_oemol(tagged_smiles) ss = oechem.OESubSearch(tagged_smiles) oechem.OEPrepareSearch(molecule, ss) ss.SetMaxMatches(1) atom_map = {} t1 = time.time() matches = [m for m in ss.Match(molecule)] t2 = time.time() seconds = t2 - t1 logger().info("Substructure search took {} seconds".format(seconds)) if not matches: logger().info("MCSS failed for {}, smiles: {}".format( molecule.GetTitle(), tagged_smiles)) return False for match in matches: for ma in match.GetAtoms(): atom_map[ma.pattern.GetMapIdx()] = ma.target.GetIdx() # sanity check mol = oechem.OEGraphMol() oechem.OESubsetMol(mol, match, True) logger().info("Match SMILES: {}".format(oechem.OEMolToSmiles(mol))) if molecule is None: return molecule, atom_map return atom_map
def get_atom_map(tagged_smiles, molecule=None): """ Returns a dictionary that maps tag on SMILES to atom index in molecule. Parameters ---------- tagged_smiles: str index-tagged explicit hydrogen SMILES string molecule: OEMol molecule to generate map for. If None, a new OEMol will be generated from the tagged SMILES, the map will map to this molecule and it will be returned. Returns ------- atom_map: dict a dictionary that maps tag to atom index {tag:idx} molecule: OEMol If a molecule was not provided, the generated molecule will be returned. """ if molecule is None: molecule = openeye.smiles_to_oemol(tagged_smiles) ss = oechem.OESubSearch(tagged_smiles) oechem.OEPrepareSearch(molecule, ss) ss.SetMaxMatches(1) atom_map = {} t1 = time.time() matches = [m for m in ss.Match(molecule)] t2 = time.time() seconds = t2-t1 logger().info("Substructure search took {} seconds".format(seconds)) if not matches: logger().info("MCSS failed for {}, smiles: {}".format(molecule.GetTitle(), tagged_smiles)) return False for match in matches: for ma in match.GetAtoms(): atom_map[ma.pattern.GetMapIdx()] = ma.target.GetIdx() # sanity check mol = oechem.OEGraphMol() oechem.OESubsetMol(mol, match, True) logger().info("Match SMILES: {}".format(oechem.OEMolToSmiles(mol))) if molecule is None: return molecule, atom_map return atom_map
def __init__(self, cas_or_aa, min_atoms=6): """ Initialize using cas numbers OR amino acid name Requires openmoltools.openeye and cirpy Arguments cas_or_aa (list of strings) either cas number or name of amino acid Optional Arguments min_atoms (int) - a minimum number of atoms for substructure match (default: 6) Creates class variables: self.cas_or_aa (list of strings) input representing molecules to be combined self.smiles_strings (list of strings) smiles representation of molecules to be combined self.ligands (list of OEMol) openeye molecule representation of molecules to be combined self.title (string) used as an identifier for input group of molecules self.min_atoms (int) minimum number of common atoms to constitute a substructure match (default: 6) """ self.cas_or_aa = cas_or_aa self.smiles_strings = [] self.ligands = [] for cas in cas_or_aa: smiles = cirpy.resolve(cas, 'smiles') self.smiles_strings.append(smiles) ligand = openeye.smiles_to_oemol(smiles) ligand = openeye.get_charges(ligand, strictStereo=False) self.ligands.append(ligand) self.title = self.cas_or_aa[0] + "_and_analogs" self.min_atoms = min_atoms self.common_substructure = None self.dual_topology = None self.each_molecule_N = [] self.mapping_dictionaries = [] self.pdb_filename = None self.ffxml_filename = None
def __init__(self, cas_or_aa, min_atoms=6): """ Initialize using cas numbers OR amino acid name Requires openmoltools.openeye and cirpy Arguments cas_or_aa (list of strings) either cas number or name of amino acid Optional Arguments min_atoms (int) - a minimum number of atoms for substructure match (default: 6) Creates class variables: self.cas_or_aa (list of strings) input representing molecules to be combined self.smiles_strings (list of strings) smiles representation of molecules to be combined self.ligands (list of OEMol) openeye molecule representation of molecules to be combined self.title (string) used as an identifier for input group of molecules self.min_atoms (int) minimum number of common atoms to constitute a substructure match (default: 6) """ self.cas_or_aa = cas_or_aa self.smiles_strings = [] self.ligands = [] for cas in cas_or_aa: smiles = cirpy.resolve(cas,'smiles') self.smiles_strings.append(smiles) ligand = openeye.smiles_to_oemol(smiles) ligand = openeye.get_charges(ligand, strictStereo=False) self.ligands.append(ligand) self.title = self.cas_or_aa[0]+"_and_analogs" self.min_atoms = min_atoms self.common_substructure = None self.dual_topology = None self.each_molecule_N = [] self.mapping_dictionaries = [] self.pdb_filename = None self.ffxml_filename = None
def test_mapped_xyz(self): """Test writing out mapped xyz""" from openeye import oechem, oeomega tagged_smiles = '[H:10][c:4]1[c:3]([c:2]([c:1]([c:6]([c:5]1[H:11])[H:12])[C:7]([H:13])([H:14])[H:15])[H:8])[H:9]' mol_1 = openeye.smiles_to_oemol('Cc1ccccc1') inf = get_fn('toluene.mol2') ifs = oechem.oemolistream(inf) mol_2 = oechem.OEMol() oechem.OEReadMolecule(ifs, mol_2) atom_map = utils.get_atom_map(tagged_smiles, mol_1) for i, mapping in enumerate(atom_map): atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping])) atom_1.SetAtomicNum(i + 1) atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping - 1)) atom_2.SetAtomicNum(i + 1) xyz_1 = utils.to_mapped_xyz(mol_1, atom_map) # molecule generated from mol2 should be in the right order. atom_map_mol2 = { 1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14 } xyz_2 = utils.to_mapped_xyz(mol_2, atom_map_mol2) for ele1, ele2 in zip(xyz_1.split('\n')[:-1], xyz_2.split('\n')[:-1]): self.assertEqual(ele1.split(' ')[2], ele2.split(' ')[2])
def test_mapped_xyz(self): """Test writing out mapped xyz""" from openeye import oechem, oeomega tagged_smiles = '[H:10][c:4]1[c:3]([c:2]([c:1]([c:6]([c:5]1[H:11])[H:12])[C:7]([H:13])([H:14])[H:15])[H:8])[H:9]' mol_1 = openeye.smiles_to_oemol('Cc1ccccc1') inf = get_fn('toluene.mol2') ifs = oechem.oemolistream(inf) mol_2 = oechem.OEMol() oechem.OEReadMolecule(ifs, mol_2) atom_map = utils.get_atom_map(tagged_smiles, mol_1) for i, mapping in enumerate(atom_map): atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping])) atom_1.SetAtomicNum(i+1) atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping-1)) atom_2.SetAtomicNum(i+1) xyz_1 = utils.to_mapped_xyz(mol_1, atom_map) # molecule generated from mol2 should be in the right order. atom_map_mol2 = {1:0, 2:1, 3:2, 4:3, 5:4, 6:5, 7:6, 8:7, 9:8, 10:9, 11:10, 12:11, 13:12, 14:13, 15:14} xyz_2 = utils.to_mapped_xyz(mol_2, atom_map_mol2) for ele1, ele2 in zip(xyz_1.split('\n')[:-1], xyz_2.split('\n')[:-1]): self.assertEqual(ele1.split(' ')[2], ele2.split(' ')[2])
def enumerate_conformations(name, smiles=None, pdbname=None): """Run Epik to get protonation states using PDB residue templates for naming. Parameters ---------- name : str Common name of molecule (used to create subdirectory) smiles : str Isomeric SMILES string pdbname : str Three-letter PDB code (e.g. 'DB8') """ # Create output subfolder output_basepath = os.path.join(output_dir, name) if not os.path.isdir(output_basepath): os.mkdir(output_basepath) output_basepath = os.path.join(output_basepath, name) if pdbname: # Make sure to only use one entry if there are mutliple if ' ' in pdbname: pdbnames = pdbname.split(' ') print("Splitting '%s' into first entry only: '%s'" % (pdbname, pdbnames[0])) pdbname = pdbnames[0] # Retrieve PDB (for atom names) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.pdb' % (pdbname[0], pdbname, pdbname) pdb_filename = output_basepath + '-input.pdb' retrieve_url(url, pdb_filename) pdb_molecule = read_molecule(pdb_filename) # Retrieve SDF (for everything else) url = 'http://ligand-expo.rcsb.org/reports/%s/%s/%s_model.sdf' % (pdbname[0], pdbname, pdbname) sdf_filename = output_basepath + '-input.sdf' retrieve_url(url, sdf_filename) sdf_molecule = read_molecule(sdf_filename) # Replace atom names in SDF for (sdf_atom, pdb_atom) in zip(sdf_molecule.GetAtoms(), pdb_molecule.GetAtoms()): sdf_atom.SetName(pdb_atom.GetName()) # Assign Tripos atom types oechem.OETriposAtomTypeNames(sdf_molecule) oechem.OETriposBondTypeNames(sdf_molecule) oe_molecule = sdf_molecule # We already know the residue name residue_name = pdbname elif smiles: # Generate molecule geometry with OpenEye print("Generating molecule {}".format(name)) oe_molecule = openeye.smiles_to_oemol(smiles) # Assign Tripos atom types oechem.OETriposAtomTypeNames(oe_molecule) oechem.OETriposBondTypeNames(oe_molecule) try: oe_molecule = openeye.get_charges(oe_molecule, keep_confs=1) except RuntimeError as e: traceback.print_exc() print("Skipping molecule " + name) return residue_name = re.sub('[^A-Za-z]+', '', name.upper())[:3] else: raise Exception('Must provide SMILES string or pdbname') # Save mol2 file, preserving atom names print("Running epik on molecule {}".format(name)) mol2_file_path = output_basepath + '-input.mol2' write_mol2_preserving_atomnames(mol2_file_path, oe_molecule, residue_name) # Run epik on mol2 file mae_file_path = output_basepath + '-epik.mae' schrodinger.run_epik(mol2_file_path, mae_file_path, tautomerize=False, max_structures=100, min_probability=np.exp(-MAX_ENERGY_PENALTY), ph=7.4) # Convert maestro file to sdf and mol2 output_sdf_filename = output_basepath + '-epik.sdf' output_mol2_filename = output_basepath + '-epik.mol2' schrodinger.run_structconvert(mae_file_path, output_sdf_filename) schrodinger.run_structconvert(mae_file_path, output_mol2_filename) # Read SDF file. ifs_sdf = oechem.oemolistream() ifs_sdf.SetFormat(oechem.OEFormat_SDF) ifs_sdf.open(output_sdf_filename) sdf_molecule = oechem.OEGraphMol() # Read MOL2 file. ifs_mol2 = oechem.oemolistream() ifs_mol2.open(output_mol2_filename) mol2_molecule = oechem.OEMol() # Assign charges. charged_molecules = list() index = 0 while oechem.OEReadMolecule(ifs_sdf, sdf_molecule): oechem.OEReadMolecule(ifs_mol2, mol2_molecule) index += 1 print("Charging molecule %d" % (index)) try: # Charge molecule. charged_molecule = openeye.get_charges(mol2_molecule, max_confs=800, strictStereo=False, normalize=True, keep_confs=None) # Assign Tripos types oechem.OETriposAtomTypeNames(charged_molecule) oechem.OETriposBondTypeNames(charged_molecule) # Store tags. oechem.OECopySDData(charged_molecule, sdf_molecule) # Store molecule charged_molecules.append(charged_molecule) except Exception as e: print(e) print("Skipping protomer/tautomer because of failed charging.") # Clean up ifs_sdf.close() ifs_mol2.close() # Write state penalites. outfile = open(output_basepath + '-state-penalties.out', 'w') for (index, charged_molecule) in enumerate(charged_molecules): # Get Epik data. epik_Ionization_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty")) epik_Ionization_Penalty_Charging = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Charging")) epik_Ionization_Penalty_Neutral = float(oechem.OEGetSDData(charged_molecule, "r_epik_Ionization_Penalty_Neutral")) epik_State_Penalty = float(oechem.OEGetSDData(charged_molecule, "r_epik_State_Penalty")) epik_Tot_Q = int(oechem.OEGetSDData(charged_molecule, "i_epik_Tot_Q")) outfile.write('%16.8f\n' % epik_State_Penalty) outfile.close() # Write as PDB charged_pdb_filename = output_basepath + '-epik-charged.pdb' ofs = oechem.oemolostream(charged_pdb_filename) flavor = oechem.OEOFlavor_PDB_CurrentResidues | oechem.OEOFlavor_PDB_ELEMENT | oechem.OEOFlavor_PDB_BONDS | oechem.OEOFlavor_PDB_HETBONDS | oechem.OEOFlavor_PDB_BOTH ofs.SetFlavor(oechem.OEFormat_PDB, flavor) for (index, charged_molecule) in enumerate(charged_molecules): # Fix residue names for atom in charged_molecule.GetAtoms(): residue = oechem.OEAtomGetResidue(atom) residue.SetName(residue_name) oechem.OEAtomSetResidue(atom, residue) #oechem.OEWritePDBFile(ofs, charged_molecule, flavor) oechem.OEWriteMolecule(ofs, charged_molecule) ofs.close() # Write molecules as mol2. charged_mol2_filename = output_basepath + '-epik-charged.mol2' write_mol2_preserving_atomnames(charged_mol2_filename, charged_molecules, residue_name)
eMolID_can_iso_smiles_dict = eMolID_smiles_dict pickle.dump(eMolID_can_iso_smiles_dict, open("eMolID_can_iso_smiles_dict.pickle", "wb")) print("Finished converting eMolecules SMILES to canonical isomeric SMILES.") print("\n") ##### CONVERT SMILES TO OEMOL ##### print("Converting SMILES to OEMol...") eMolID_oemol_dict = {} for key, value in eMolID_smiles_dict.items(): # Create a OEMolBuilder from a smiles string. oemol_molecule = omtoe.smiles_to_oemol(smiles=value) eMolID_oemol_dict[key] = oemol_molecule # print(oMolID_oemol_dict) print("\n") ##### GENERATE CHARGED CONFORMERS AND SAVE AS MOL2 FILE ##### mol2_directory_path = "./mol2_files" if not os.path.exists(mol2_directory_path): os.makedirs(mol2_directory_path) print("{} directory created.".format(mol2_directory_path)) print("Generating charged OEMol molecules...")
""" Test fragmentation """ __author__ = 'Chaya D. Stern' from torsionfit.tests.utils import get_fn, has_openeye, FileIOTestCase import unittest # TODO should I move this to SetUp? if has_openeye: from openmoltools.openeye import get_charges, smiles_to_oemol import openeye.oechem as oechem from torsionfit.qmscan import fragment mol = smiles_to_oemol( 'CN(C)C/C=C/C(=O)NC1=C(C=C2C(=C1)C(=NC=N2)NC3=CC(=C(C=C3)F)Cl)O[C@H]4CCOC4' ) charged = get_charges(mol, keep_confs=1) class TestFragments(FileIOTestCase): @unittest.skipUnless(has_openeye, "Cannot test without OpenEye") def test_tag_funcgroup(self): """ Test tag functional groups """ tagged_funcgroups = fragment._tag_fgroups(charged) self.assertEquals(len(tagged_funcgroups), 3) atom_idx = tagged_funcgroups['amide_0'][0].pop() atom = charged.GetAtom(oechem.OEHasAtomIdx(atom_idx)) fgroup = atom.GetData('fgroup') self.assertEquals('amide_0', fgroup) @unittest.skipUnless(has_openeye, "Cannot test without OpenEye") def test_tag_rings(self):
# Create output directory if not os.path.exists(LIGANDS_DIR_PATH): os.makedirs(LIGANDS_DIR_PATH) # Parse SMILES file and generate ligand smiles_file = open(SMILES_FILE_PATH, 'r') for line in smiles_file: # Get SMILES representation from CVS file ligand_name, smiles_str = line.strip().split(',') print "Generating inhibitor", ligand_name # Convert to OEMol molecule mol_oemol = smiles_to_oemol(smiles_str) mol_oemol.SetTitle(ligand_name) # Use OpenEye Omega toolkit to generate lowest energy structure omega = oeomega.OEOmega() omega.SetCanonOrder(False) omega.SetMaxConfs(OMEGA_MAX_CONFS) omega(mol_oemol) # Generate protonation/tautomeric states with epik and charge molecule valid_structure = False with working_directory(temp_dir): for i in range(mol_oemol.GetMaxConfIdx()): try: confomer = mol_oemol.GetConf(oechem.OEHasConfIdx(i)) mk_single_conformer_epik(ligand_name, confomer, pH=PH)
def __init__(self, molecules: List[str], output_filename: str, ncmc_switching_times: Dict[str, int], equilibrium_steps: Dict[str, int], timestep: unit.Quantity, initial_molecule: str=None, geometry_options: Dict=None): self._molecules = [SmallMoleculeSetProposalEngine.canonicalize_smiles(molecule) for molecule in molecules] environments = ['explicit', 'vacuum'] temperature = 298.15 * unit.kelvin pressure = 1.0 * unit.atmospheres constraints = app.HBonds self._storage = NetCDFStorage(output_filename) self._ncmc_switching_times = ncmc_switching_times self._n_equilibrium_steps = equilibrium_steps self._geometry_options = geometry_options # Create a system generator for our desired forcefields. from perses.rjmc.topology_proposal import SystemGenerator system_generators = dict() from pkg_resources import resource_filename gaff_xml_filename = resource_filename('perses', 'data/gaff.xml') barostat = openmm.MonteCarloBarostat(pressure, temperature) system_generators['explicit'] = SystemGenerator([gaff_xml_filename, 'tip3p.xml'], forcefield_kwargs={'nonbondedCutoff': 9.0 * unit.angstrom, 'implicitSolvent': None, 'constraints': constraints, 'ewaldErrorTolerance': 1e-5, 'hydrogenMass': 3.0*unit.amu}, periodic_forcefield_kwargs = {'nonbondedMethod': app.PME} barostat=barostat) system_generators['vacuum'] = SystemGenerator([gaff_xml_filename], forcefield_kwargs={'implicitSolvent': None, 'constraints': constraints, 'hydrogenMass': 3.0*unit.amu}, nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff}) # # Create topologies and positions # topologies = dict() positions = dict() from openmoltools import forcefield_generators forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) # Create molecule in vacuum. from perses.utils.openeye import extractPositionsFromOEMol from openmoltools.openeye import smiles_to_oemol, generate_conformers if initial_molecule: smiles = initial_molecule else: smiles = np.random.choice(molecules) molecule = smiles_to_oemol(smiles) molecule = generate_conformers(molecule, max_confs=1) topologies['vacuum'] = forcefield_generators.generateTopologyFromOEMol(molecule) positions['vacuum'] = extractPositionsFromOEMol(molecule) # Create molecule in solvent. modeller = app.Modeller(topologies['vacuum'], positions['vacuum']) modeller.addSolvent(forcefield, model='tip3p', padding=9.0 * unit.angstrom) topologies['explicit'] = modeller.getTopology() positions['explicit'] = modeller.getPositions() # Set up the proposal engines. proposal_metadata = {} proposal_engines = dict() for environment in environments: proposal_engines[environment] = SmallMoleculeSetProposalEngine(self._molecules, system_generators[environment]) # Generate systems systems = dict() for environment in environments: systems[environment] = system_generators[environment].build_system(topologies[environment]) # Define thermodynamic state of interest. thermodynamic_states = dict() thermodynamic_states['explicit'] = states.ThermodynamicState(system=systems['explicit'], temperature=temperature, pressure=pressure) thermodynamic_states['vacuum'] = states.ThermodynamicState(system=systems['vacuum'], temperature=temperature) # Create SAMS samplers from perses.samplers.samplers import ExpandedEnsembleSampler, SAMSSampler mcmc_samplers = dict() exen_samplers = dict() sams_samplers = dict() for environment in environments: storage = NetCDFStorageView(self._storage, envname=environment) if self._geometry_options: n_torsion_divisions = self._geometry_options['n_torsion_divsions'][environment] use_sterics = self._geometry_options['use_sterics'][environment] else: n_torsion_divisions = 180 use_sterics = False geometry_engine = geometry.FFAllAngleGeometryEngine(storage=storage, n_torsion_divisions=n_torsion_divisions, use_sterics=use_sterics) move = mcmc.LangevinSplittingDynamicsMove(timestep=timestep, splitting="V R O R V", n_restart_attempts=10) chemical_state_key = proposal_engines[environment].compute_state_key(topologies[environment]) if environment == 'explicit': sampler_state = states.SamplerState(positions=positions[environment], box_vectors=systems[environment].getDefaultPeriodicBoxVectors()) else: sampler_state = states.SamplerState(positions=positions[environment]) mcmc_samplers[environment] = mcmc.MCMCSampler(thermodynamic_states[environment], sampler_state, move) exen_samplers[environment] = ExpandedEnsembleSampler(mcmc_samplers[environment], topologies[environment], chemical_state_key, proposal_engines[environment], geometry_engine, options={'nsteps': self._ncmc_switching_times[environment]}, storage=storage, ncmc_write_interval=self._ncmc_switching_times[environment]) exen_samplers[environment].verbose = True sams_samplers[environment] = SAMSSampler(exen_samplers[environment], storage=storage) sams_samplers[environment].verbose = True # Create test MultiTargetDesign sampler. from perses.samplers.samplers import MultiTargetDesign target_samplers = {sams_samplers['explicit']: 1.0, sams_samplers['vacuum']: -1.0} designer = MultiTargetDesign(target_samplers, storage=self._storage) # Store things. self.molecules = molecules self.environments = environments self.topologies = topologies self.positions = positions self.system_generators = system_generators self.proposal_engines = proposal_engines self.thermodynamic_states = thermodynamic_states self.mcmc_samplers = mcmc_samplers self.exen_samplers = exen_samplers self.sams_samplers = sams_samplers self.designer = designer
def generate_vacuum_hostguest_proposal(current_mol_name="B2", proposed_mol_name="MOL"): """ Generate a test vacuum topology proposal, current positions, and new positions triplet from two IUPAC molecule names. Parameters ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ from openmoltools import forcefield_generators from openmmtools import testsystems from perses.utils.openeye import smiles_to_oemol from perses.utils.data import get_data_filename host_guest = testsystems.HostGuestVacuum() unsolv_old_system, old_positions, top_old = host_guest.system, host_guest.positions, host_guest.topology ligand_topology = [res for res in top_old.residues()] current_mol = forcefield_generators.generateOEMolFromTopologyResidue( ligand_topology[1]) # guest is second residue in topology proposed_mol = smiles_to_oemol('C1CC2(CCC1(CC2)C)C') initial_smiles = oechem.OEMolToSmiles(current_mol) final_smiles = oechem.OEMolToSmiles(proposed_mol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator( forcefield_generators.gaffTemplateGenerator) solvated_system = forcefield.createSystem(top_old, removeCMMotion=False) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator( [gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'], forcefield_kwargs={ 'removeCMMotion': False, 'nonbondedMethod': app.NoCutoff }) geometry_engine = geometry.FFAllAngleGeometryEngine() proposal_engine = SmallMoleculeSetProposalEngine( [initial_smiles, final_smiles], system_generator, residue_name=current_mol_name) #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, top_old, current_mol=current_mol, proposed_mol=proposed_mol) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, old_positions, beta) return topology_proposal, old_positions, new_positions
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene", proposed_mol_name="benzene", current_mol_smiles=None, proposed_mol_smiles=None, vacuum=False, render_atom_mapping=False): """ This function will generate a topology proposal, old positions, and new positions with a geometry proposal (either vacuum or solvated) given a set of input iupacs or smiles. The function will (by default) read the iupac names first. If they are set to None, then it will attempt to read a set of current and new smiles. An atom mapping pdf will be generated if specified. Arguments ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule current_mol_smiles : str (default None) current mol smiles proposed_mol_smiles : str (default None) proposed mol smiles vacuum: bool (default False) whether to render a vacuum or solvated topology_proposal render_atom_mapping : bool (default False) whether to render the atom map of the current_mol_name and proposed_mol_name Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ import simtk.openmm.app as app from openmoltools import forcefield_generators from openeye import oechem from openmoltools.openeye import iupac_to_oemol, generate_conformers, smiles_to_oemol from openmoltools import forcefield_generators import perses.utils.openeye as openeye from perses.utils.data import get_data_filename from perses.rjmc.topology_proposal import TopologyProposal, SystemGenerator, SmallMoleculeSetProposalEngine import simtk.unit as unit from perses.rjmc.geometry import FFAllAngleGeometryEngine if current_mol_name != None and proposed_mol_name != None: try: old_oemol, new_oemol = iupac_to_oemol( current_mol_name), iupac_to_oemol(proposed_mol_name) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception( f"either {current_mol_name} or {proposed_mol_name} is not compatible with 'iupac_to_oemol' function!" ) elif current_mol_smiles != None and proposed_mol_smiles != None: try: old_oemol, new_oemol = smiles_to_oemol( current_mol_smiles), smiles_to_oemol(proposed_mol_smiles) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception(f"the variables are not compatible") else: raise Exception( f"either current_mol_name and proposed_mol_name must be specified as iupacs OR current_mol_smiles and proposed_mol_smiles must be specified as smiles strings." ) old_oemol, old_system, old_positions, old_topology = openeye.createSystemFromSMILES( old_smiles, title="MOL") #correct the old positions old_positions = openeye.extractPositionsFromOEMol(old_oemol) old_positions = old_positions.in_units_of(unit.nanometers) new_oemol, new_system, new_positions, new_topology = openeye.createSystemFromSMILES( new_smiles, title="NEW") ffxml = forcefield_generators.generateForceFieldFromMolecules( [old_oemol, new_oemol]) old_oemol.SetTitle('MOL') new_oemol.SetTitle('MOL') old_topology = forcefield_generators.generateTopologyFromOEMol(old_oemol) new_topology = forcefield_generators.generateTopologyFromOEMol(new_oemol) if not vacuum: nonbonded_method = app.PME barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300.0 * unit.kelvin, 50) else: nonbonded_method = app.NoCutoff barostat = None gaff_xml_filename = get_data_filename("data/gaff.xml") system_generator = SystemGenerator( [gaff_xml_filename, 'amber99sbildn.xml', 'tip3p.xml'], barostat=barostat, forcefield_kwargs={ 'removeCMMotion': False, 'nonbondedMethod': nonbonded_method, 'constraints': app.HBonds, 'hydrogenMass': 4.0 * unit.amu }) system_generator._forcefield.loadFile(StringIO(ffxml)) proposal_engine = SmallMoleculeSetProposalEngine([old_smiles, new_smiles], system_generator, residue_name='MOL') geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False) if not vacuum: #now to solvate modeller = app.Modeller(old_topology, old_positions) hs = [ atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name not in ['MOL', 'OLD', 'NEW'] ] modeller.delete(hs) modeller.addHydrogens(forcefield=system_generator._forcefield) modeller.addSolvent(system_generator._forcefield, model='tip3p', padding=9.0 * unit.angstroms) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() solvated_positions = unit.quantity.Quantity(value=np.array([ list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system) ]), unit=unit.nanometers) solvated_system = system_generator.build_system(solvated_topology) #now to create proposal top_proposal = proposal_engine.propose( current_system=solvated_system, current_topology=solvated_topology, current_mol=old_oemol, proposed_mol=new_oemol) new_positions, _ = geometry_engine.propose(top_proposal, solvated_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print( f"new_to_old: {proposal_engine.non_offset_new_to_old_atom_map}" ) render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, proposal_engine.non_offset_new_to_old_atom_map) return top_proposal, solvated_positions, new_positions else: vacuum_system = system_generator.build_system(old_topology) top_proposal = proposal_engine.propose(current_system=vacuum_system, current_topology=old_topology, current_mol=old_oemol, proposed_mol=new_oemol) new_positions, _ = geometry_engine.propose(top_proposal, old_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print(f"new_to_old: {top_proposal._new_to_old_atom_map}") render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, top_proposal._new_to_old_atom_map) return top_proposal, old_positions, new_positions
""" Test fragmentation """ __author__ = 'Chaya D. Stern' from torsionfit.tests.utils import get_fun, has_openeye import unittest if has_openeye: from openmoltools.openeye import get_charges, smiles_to_oemol import openeye.oechem as oechem from torsionfit.qmscan import fragment mol = smiles_to_oemol('CN(C)C/C=C/C(=O)NC1=C(C=C2C(=C1)C(=NC=N2)NC3=CC(=C(C=C3)F)Cl)O[C@H]4CCOC4') charged = get_charges(mol, keep_confs=1) class TestFragments(unittest.TestCase): @unittest.skipUnless(has_openeye, "Cannot test without OpenEye") def test_tag_funcgroup(self): """ Test tag functional groups """ tagged_funcgroups = fragment._tag_fgroups(charged) self.assertEquals(len(tagged_funcgroups), 3) atom_idx = tagged_funcgroups['amide_0'][0].pop() atom = charged.GetAtom(oechem.OEHasAtomIdx(atom_idx)) fgroup = atom.GetData('fgroup') self.assertEquals('amide_0', fgroup) @unittest.skipUnless(has_openeye, "Cannot test without OpenEye") def test_tag_rings(self): """ Test tag rings""" tagged_rings = fragment._tag_rings(charged) self.assertEquals(len(tagged_rings), 3)