def run_oemol_test_suite(iupac='ethane'): """ Runs all of the oemol related tests for perses.utils.openeye Parameters --------- iupac : str, default 'ethane' """ from openmoltools.openeye import iupac_to_oemol import copy import numpy as np import simtk.unit as unit from openeye import oechem oemol = iupac_to_oemol(iupac) positions = test_extractPositionsFromOEMol(oemol) # shifting all of the positions by 1. A new_positions = np.zeros(np.shape(positions)) for atom in range(oemol.NumAtoms()): new_positions[atom] = copy.deepcopy(positions[atom]) + [1., 1., 1.]*unit.angstrom new_positions *= unit.angstrom molecule = test_giveOpenmmPositionsToOEMol(new_positions,oemol) smiles = oechem.OECreateSmiString(molecule,oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) smiles_oemol = smiles_to_oemol(smiles) # check that the two systems have the same numbers of atoms assert (oemol.NumAtoms() == smiles_oemol.NumAtoms()), "Discrepancy between molecule generated from IUPAC and SMILES"
def LibGen(libgen, ofs, unique, isomeric): smiflag = oechem.OESMILESFlag_DEFAULT # Canonical|AtomMaps|Rgroup if isomeric: smiflag |= oechem.OESMILESFlag_ISOMERIC # access products uniqproducts = [] for mol in libgen.GetProducts(): smiles = oechem.OECreateSmiString(mol, smiflag) if not unique or smiles not in uniqproducts: uniqproducts.append(smiles) oechem.OEWriteMolecule(ofs, mol)
def get_smarts(prefix, atom_idxs): """Get the SMARTS corresponding to a list of atom indices""" offmol = Molecule.from_file(prefix + '.mol2') fix_carboxylate_bond_orders(offmol) if prefix in prefix2pmd_struct: pmd_struct = prefix2pmd_struct[prefix] else: pmd_struct = ParmEd.load_file(prefix + '.prmtop') prefix2pmd_struct[prefix] = pmd_struct oemol = offmol.to_openeye() residues_of_interest = set() atom_indices_of_interest = set() #for atom_idx in atom_idxs: #residues_of_interest.add(pmd_struct.atoms[atom_idx].residue.idx) #atom_indices_of_interest.add(atom_idx) #for neighbor in oemol.GetAtom(atom_idx).GetAtoms(): # atom_indices_of_interest.add(neighbor.GetIdx()) for oeatom, pmd_atom in zip(oemol.GetAtoms(), pmd_struct.atoms): # Delete all non-residue-of-interest atoms #if (pmd_atom.residue.idx in residues_of_interest): # atom_indices_of_interest.add(pmd_atom.idx) # Assign tags to atoms of interest if (oeatom.GetIdx() in atom_idxs): atom_idx = oeatom.GetIdx() map_index = atom_idxs.index(atom_idx) + 1 oeatom.SetMapIdx(map_index) atom_indices_of_interest.add(atom_idx) for neighbor in oeatom.GetAtoms(): atom_indices_of_interest.add(neighbor.GetIdx()) # Make a "Subset" molecule, so that we don't get weird charges # around where we cleave the residues subsetmol = OEChem.OEGraphMol() oepred = OEChem.PyAtomPredicate( lambda x: x.GetIdx() in atom_indices_of_interest) OEChem.OESubsetMol(subsetmol, oemol, oepred) smiles_options = (OEChem.OESMILESFlag_Canonical | OEChem.OESMILESFlag_Isotopes | OEChem.OESMILESFlag_RGroups) # Add the atom and bond stereo flags smiles_options |= OEChem.OESMILESFlag_AtomStereo | OEChem.OESMILESFlag_BondStereo # Add the hydrogen flag smiles_options |= OEChem.OESMILESFlag_Hydrogens smiles_options |= OEChem.OESMILESFlag_AtomMaps smiles = OEChem.OECreateSmiString(subsetmol, smiles_options) return smiles
def CanSmi(mol, isomeric, kekule): oechem.OEFindRingAtomsAndBonds(mol) oechem.OEAssignAromaticFlags(mol, oechem.OEAroModel_OpenEye) smiflag = oechem.OESMILESFlag_Canonical if isomeric: smiflag |= oechem.OESMILESFlag_ISOMERIC if kekule: for bond in mol.GetBonds(oechem.OEIsAromaticBond()): bond.SetIntType(5) oechem.OECanonicalOrderAtoms(mol) oechem.OECanonicalOrderBonds(mol) oechem.OEClearAromaticFlags(mol) oechem.OEKekulize(mol) smi = oechem.OECreateSmiString(mol, smiflag) return smi
def to_oechem_can(smi): #,hack_uff=F): """ Note that some SMILES such as C=N=O cannot be recognized correctly by openbabel. With OEChem, it is interpretted as C=[NH]=O, as was desired """ from openeye import oechem if smi in element_cans: return '[' + smi + 'H%d]' % (nves[smi]) m = oechem.OEGraphMol() assert oechem.OESmilesToMol(m, smi) assert oechem.OEAddExplicitHydrogens(m) #atyps = [] #for ai in m.GetAtoms(): # zi = ai.GetAtomicNum() # vi = ai.GetValence() # nhi = ai.GetTotalHCount() # if zi in [7,15] and vi==5 and nhi>0: # for aj in ai.GetAtomIter(): # zj = aj.GetAtomicNum() # if zj==1: aj.SetIsotope(T) # elif zi in [5] and vi==3 and nhi>0: # for aj in ai.GetAtomIter(): # zj = aj.GetAtomicNum() # if zj==1: aj.SetIsotope(T) # # "OESMILESFlag_ImpHCount" is indispensible!! # Otherwise, B=C won't be processed correctly by openbabel, i.e., somehow obabel # tries to add two H's (instead of one) to B. While things are ok with [BH]=C flavor = oechem.OESMILESFlag_Isotopes | oechem.OESMILESFlag_Canonical | oechem.OESMILESFlag_ImpHCount smi = oechem.OECreateSmiString(m, flavor) # OECreateIsoSmiString() # ISOMERIC .eq. Isotopes | AtomStereo | BondStereo | Canonical | AtomMaps | RGroups #fout = tpf.NamedTemporaryFile(dir='/tmp/').name + '.sdf' #ofs = oemolostream(fout) #ofs.SetFormat( OEFormat_SDF ) #OEWriteMolecule(ofs, m) #_m = pb.readstring( 'sdf', open(fout,'r').read() ) #m = _m.OBMol #os.system('rm %s'%fout) return smi # m
def CanSmi(mol, isomeric, kekule): """ Returns the cannonical smile from the OEMol provided :param mol: OEMolBase object :param isomeric: force isometric :param kekule: use kekule cleaning :return: string of OESmiles """ oechem.OEFindRingAtomsAndBonds(mol) oechem.OEAssignAromaticFlags(mol, oechem.OEAroModel_OpenEye) smiflag = oechem.OESMILESFlag_Canonical if isomeric: smiflag |= oechem.OESMILESFlag_ISOMERIC if kekule: for bond in mol.GetBonds(oechem.OEIsAromaticBond()): bond.SetIntType(5) oechem.OECanonicalOrderAtoms(mol) oechem.OECanonicalOrderBonds(mol) oechem.OEClearAromaticFlags(mol) oechem.OEKekulize(mol) smi = oechem.OECreateSmiString(mol, smiflag) return smi
def sanitizeSMILES(smiles_list, mode='drop', verbose=False): """ Sanitize set of SMILES strings by ensuring all are canonical isomeric SMILES. Duplicates are also removed. Parameters ---------- smiles_list : iterable of str The set of SMILES strings to sanitize. mode : str, optional, default='drop' When a SMILES string that does not correspond to canonical isomeric SMILES is found, select the action to be performed. 'exception' : raise an `Exception` 'drop' : drop the SMILES string 'expand' : expand all stereocenters into multiple molecules verbose : bool, optional, default=False If True, print verbose output. Returns ------- sanitized_smiles_list : list of str Sanitized list of canonical isomeric SMILES strings. Examples -------- Sanitize a simple list. >>> smiles_list = ['CC', 'CCC', '[H][C@]1(NC[C@@H](CC1CO[C@H]2CC[C@@H](CC2)O)N)[H]'] Throw an exception if undefined stereochemistry is present. >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='exception') Traceback (most recent call last): ... Exception: Molecule '[H][C@]1(NC[C@@H](CC1CO[C@H]2CC[C@@H](CC2)O)N)[H]' has undefined stereocenters Drop molecules iwth undefined stereochemistry. >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='drop') >>> len(sanitized_smiles_list) 2 Expand molecules iwth undefined stereochemistry. >>> sanitized_smiles_list = sanitizeSMILES(smiles_list, mode='expand') >>> len(sanitized_smiles_list) 4 """ from openeye import oechem from openeye.oechem import OEGraphMol, OESmilesToMol, OECreateIsoSmiString from perses.tests.utils import has_undefined_stereocenters, enumerate_undefined_stereocenters sanitized_smiles_set = set() OESMILES_OPTIONS = oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_ISOMERIC | oechem.OESMILESFlag_Hydrogens ## IVY for smiles in smiles_list: molecule = OEGraphMol() OESmilesToMol(molecule, smiles) oechem.OEAddExplicitHydrogens(molecule) if verbose: molecule.SetTitle(smiles) oechem.OETriposAtomNames(molecule) if has_undefined_stereocenters(molecule, verbose=verbose): if mode == 'drop': if verbose: print("Dropping '%s' due to undefined stereocenters." % smiles) continue elif mode == 'exception': raise Exception("Molecule '%s' has undefined stereocenters" % smiles) elif mode == 'expand': if verbose: print('Expanding stereochemistry:') print('original: %s', smiles) molecules = enumerate_undefined_stereocenters(molecule, verbose=verbose) for molecule in molecules: smiles_string = oechem.OECreateSmiString(molecule, OESMILES_OPTIONS) ## IVY sanitized_smiles_set.add(smiles_string) ## IVY if verbose: print('expanded: %s', smiles_string) else: # Convert to OpenEye's canonical isomeric SMILES. smiles_string = oechem.OECreateSmiString(molecule, OESMILES_OPTIONS) ## IVY sanitized_smiles_set.add(smiles_string) ## IVY sanitized_smiles_list = list(sanitized_smiles_set) return sanitized_smiles_list
create_systems(topologies, positions, output_directory, project_prefix, solvate=solvate) #generate atom maps for all pairs: ifs = oechem.oemolistream() ifs.open(ligand_filename) # get the list of molecules mol_list = [oechem.OEMol(mol) for mol in ifs.GetOEMols()] smiles_list = [] for idx, mol in enumerate(mol_list): mol.SetTitle("MOL{}".format(idx)) oechem.OETriposAtomNames(mol) smiles_list.append(oechem.OECreateSmiString(mol, OESMILES_OPTIONS)) #smiles_list = [oechem.OECreateSmiString(mol, OESMILES_OPTIONS)] atom_mapper = AtomMapper(mol_list) atom_mapper.map_all_molecules() atom_mapper.generate_and_check_proposal_matrix() atom_mapper_filename = os.path.join( output_directory, "{}_atom_mapper.json".format(project_prefix)) with open(atom_mapper_filename, 'w') as map_outfile: map_outfile.write(atom_mapper.to_json())
def createSMILESfromOEMol(molecule): smiles = oechem.OECreateSmiString(molecule, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) return smiles
def __init__(self, protein_pdb_filename, ligand_file, old_ligand_index, new_ligand_index, forcefield_files, pressure=1.0 * unit.atmosphere, temperature=300.0 * unit.kelvin, solvent_padding=9.0 * unit.angstroms): """ Initialize a NonequilibriumFEPSetup object Parameters ---------- protein_pdb_filename : str The name of the protein pdb file ligand_file : str the name of the ligand file (any openeye supported format) ligand_smiles : list of two str The SMILES strings representing the two ligands forcefield_files : list of str The list of ffxml files that contain the forcefields that will be used pressure : Quantity, units of pressure Pressure to use in the barostat temperature : Quantity, units of temperature Temperature to use for the Langevin integrator solvent_padding : Quantity, units of length The amount of padding to use when adding solvent """ self._protein_pdb_filename = protein_pdb_filename self._pressure = pressure self._temperature = temperature self._barostat_period = 50 self._padding = solvent_padding self._ligand_file = ligand_file self._old_ligand_index = old_ligand_index self._new_ligand_index = new_ligand_index self._old_ligand_oemol = self.load_sdf(self._ligand_file, index=self._old_ligand_index) self._new_ligand_oemol = self.load_sdf(self._ligand_file, index=self._new_ligand_index) self._old_ligand_positions = extractPositionsFromOEMOL( self._old_ligand_oemol) ffxml = forcefield_generators.generateForceFieldFromMolecules( [self._old_ligand_oemol, self._new_ligand_oemol]) self._old_ligand_oemol.SetTitle("MOL") self._new_ligand_oemol.SetTitle("MOL") self._new_ligand_smiles = oechem.OECreateSmiString( self._new_ligand_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) #self._old_ligand_smiles = '[H]c1c(c(c(c(c1N([H])c2nc3c(c(n2)OC([H])([H])C4(C(C(C(C(C4([H])[H])([H])[H])([H])[H])([H])[H])([H])[H])[H])nc(n3[H])[H])[H])[H])S(=O)(=O)C([H])([H])[H])[H]' self._old_ligand_smiles = oechem.OECreateSmiString( self._old_ligand_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) print(self._new_ligand_smiles) print(self._old_ligand_smiles) self._old_ligand_topology = forcefield_generators.generateTopologyFromOEMol( self._old_ligand_oemol) self._old_ligand_md_topology = md.Topology.from_openmm( self._old_ligand_topology) self._new_ligand_topology = forcefield_generators.generateTopologyFromOEMol( self._new_ligand_oemol) self._new_liands_md_topology = md.Topology.from_openmm( self._new_ligand_topology) protein_pdbfile = open(self._protein_pdb_filename, 'r') pdb_file = app.PDBFile(protein_pdbfile) protein_pdbfile.close() self._protein_topology_old = pdb_file.topology self._protein_md_topology_old = md.Topology.from_openmm( self._protein_topology_old) self._protein_positions_old = pdb_file.positions self._forcefield = app.ForceField(*forcefield_files) self._forcefield.loadFile(StringIO(ffxml)) print("Generated forcefield") self._complex_md_topology_old = self._protein_md_topology_old.join( self._old_ligand_md_topology) self._complex_topology_old = self._complex_md_topology_old.to_openmm() n_atoms_complex_old = self._complex_topology_old.getNumAtoms() n_atoms_protein_old = self._protein_topology_old.getNumAtoms() self._complex_positions_old = unit.Quantity(np.zeros( [n_atoms_complex_old, 3]), unit=unit.nanometers) self._complex_positions_old[: n_atoms_protein_old, :] = self._protein_positions_old self._complex_positions_old[ n_atoms_protein_old:, :] = self._old_ligand_positions if pressure is not None: barostat = openmm.MonteCarloBarostat(self._pressure, self._temperature, self._barostat_period) self._system_generator = SystemGenerator( forcefield_files, barostat=barostat, forcefield_kwargs={'nonbondedMethod': app.PME}) else: self._system_generator = SystemGenerator(forcefield_files) #self._complex_proposal_engine = TwoMoleculeSetProposalEngine(self._old_ligand_smiles, self._new_ligand_smiles, self._system_generator, residue_name="MOL") self._complex_proposal_engine = TwoMoleculeSetProposalEngine( self._old_ligand_oemol, self._new_ligand_oemol, self._system_generator, residue_name="MOL") self._geometry_engine = FFAllAngleGeometryEngine() self._complex_topology_old_solvated, self._complex_positions_old_solvated, self._complex_system_old_solvated = self._solvate_system( self._complex_topology_old, self._complex_positions_old) self._complex_md_topology_old_solvated = md.Topology.from_openmm( self._complex_topology_old_solvated) print(self._complex_proposal_engine._smiles_list) beta = 1.0 / (kB * temperature) self._complex_topology_proposal = self._complex_proposal_engine.propose( self._complex_system_old_solvated, self._complex_topology_old_solvated) self._complex_positions_new_solvated, _ = self._geometry_engine.propose( self._complex_topology_proposal, self._complex_positions_old_solvated, beta) #now generate the equivalent objects for the solvent phase. First, generate the ligand-only topologies and atom map self._solvent_topology_proposal, self._old_solvent_positions = self._generate_ligand_only_topologies( self._complex_positions_old_solvated, self._complex_positions_new_solvated) self._new_solvent_positions, _ = self._geometry_engine.propose( self._solvent_topology_proposal, self._old_solvent_positions, beta)
def process(self, record, port): try: if port == 'intake': if not record.has_value(Fields.primary_molecule): raise ValueError( "Missing the ligand primary molecule field") ligand = record.get_value(Fields.primary_molecule) if ligand.NumConfs() > 1: raise ValueError( "The ligand {} has multiple conformers: {}".format( ligand.GetTitle(), ligand.GetNumConfs())) if not record.has_value(Fields.title): self.log.warn( "Missing title field '{}' field; improvising".format( Fields.title.get_name())) ligand_title = ligand.GetTitle()[0:12] else: ligand_title = record.get_value(Fields.title) protein = self.md_components.get_protein self.md_components.set_ligand(ligand) # Check if the ligand is inside the binding site. Cutoff distance 3A if not oeommutils.check_shell(ligand, protein, 3): raise ValueError( "The Ligand is probably outside the Protein binding site" ) # Remove Steric Clashes between the ligand and the other System components for comp_name, comp in self.md_components.get_components.items( ): # Skip clashes between the ligand itself and the protein if comp_name in ['ligand', 'protein']: continue # Remove Metal clashes if the distance between the metal and the ligand # is less than 1A elif comp_name == 'metals': metal_del = oeommutils.delete_shell(ligand, comp, 1.0, in_out='in') if metal_del.NumAtoms() != comp.NumAtoms(): self.opt['Logger'].info( "Detected steric-clashes between the ligand {} and metals" .format(ligand_title)) self.md_components.set_metals(metal_del) # Remove clashes if the distance between the selected component and the ligand # is less than 1.5A else: comp_del = oeommutils.delete_shell(ligand, comp, 1.5, in_out='in') if comp_del.NumAtoms() != comp.NumAtoms(): self.opt['Logger'].info( "Detected steric-clashes between the ligand {} and component {}" .format(ligand_title, comp_name)) self.md_components.set_component_by_name( comp_name, comp_del) complex_title = 'p' + self.md_components.get_title + '_l' + ligand_title mdcomp = self.md_components.copy mdcomp.set_title(complex_title) # Check Ligand lig_check = mdcomp.get_ligand smi_lig_check = oechem.OECreateSmiString(lig_check) smi_ligand = oechem.OECreateSmiString(ligand) if smi_ligand != smi_lig_check: raise ValueError( "Ligand IsoSmiles String check failure: {} vs {}". format(smi_lig_check, smi_ligand)) # the ligand is the primary molecule new_record = OERecord(record) new_record.set_value(Fields.title, complex_title) new_record.set_value(Fields.ligand, ligand) new_record.set_value(Fields.protein, protein) # Check Protein Name if protein.GetTitle(): protein_name = protein.GetTitle() else: protein_name = "prot" new_record.set_value(Fields.protein_name, protein_name) new_record.set_value(Fields.md_components, mdcomp) self.success.emit(new_record) except Exception as e: print("Failed to complete", str(e), flush=True) self.opt['Logger'].info('Exception {} {}'.format( str(e), self.title)) self.log.error(traceback.format_exc()) self.failure.emit(record) return
def getscores(self, actions, gsmis, prot, lig, num_returns=10, return_docked_pose=False, refmol=None): with self.logger("getscores") as logger: if num_returns <= 0: num_returns = len(actions) - 1 logger.log("Action space is ", len(actions)) idxs = list( np.random.choice(len(actions), min(num_returns, len(actions) - 1), replace=False).flatten()) actions = [actions[idx] for idx in idxs] gsmis = [gsmis[idx] for idx in idxs] protein = oechem.OEMol(prot) receptor = oechem.OEGraphMol() if not (self.sort == 'iscores' and self.optimize): logger.log( "Creating receptor from recent pdb, this might take awhile" ) oedocking.OEMakeReceptor(receptor, protein, lig) dockobj = oedocking.OEDock(self.dockmethod) dockobj.Initialize(receptor) assert (dockobj.IsInitialized()) logger.log("done") else: dockobj = None logger.log( "Skipping receptor building as optimize is set and sort method is iscore." ) pscores = [] dscores = [] ds_old_scores = [] ds_start_scores = [] data = [] with multiprocessing.Pool() as p: imapiter = p.imap( self.env.action.aligner.__class__.call_static, zip(actions, gsmis, [copy.deepcopy(refmol)] * len(actions))) for idx, res in enumerate(imapiter): try: if res is None: logger.error( "Alignment failed and returned none for ", gsmis[idx]) continue ps, ds, ds_start, ds_old = None, None, None, [] new_mol, new_mol2, gs, action = res if dockobj is not None: dockedpose = oechem.OEMol() newmol2 = oechem.OEMol(new_mol) dockobj.DockMultiConformerMolecule( dockedpose, newmol2, 1) ds = dockedpose.GetEnergy() ps = dockobj.ScoreLigand(new_mol) dscores.append(ds) pscores.append(ps) if return_docked_pose: new_mol_ = oechem.OEMol(dockedpose) if self.start_dobj is not None: dockedpose2 = oechem.OEMol() newmol2 = oechem.OEMol(new_mol) self.start_dobj.DockMultiConformerMolecule( dockedpose2, newmol2, 1) ds_start = dockedpose2.GetEnergy() ds_start_scores.append(ds_start) if self.track_hscores: for olddobj in self.past_dockobjs: dockedpose2 = oechem.OEMol() newmol2 = oechem.OEMol(new_mol) olddobj.DockMultiConformerMolecule( dockedpose2, newmol2, 1) ds_old.append(dockedpose2.GetEnergy()) ds_old_scores.append(ds_old) if dockobj is not None and return_docked_pose: new_mol = new_mol_ oechem.OEAssignAromaticFlags(new_mol) oechem.OEAddExplicitHydrogens(new_mol) oechem.OE3DToInternalStereo(new_mol) new_mol2 = oechem.OEMol(new_mol) gs = oechem.OECreateSmiString( new_mol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens | oechem.OESMILESFlag_Isotopes | oechem.OESMILESFlag_BondStereo | oechem.OESMILESFlag_AtomStereo) logger.log( f"(idx / {len(idxs)}: Pose Score {ps}, Dock Score {ds}, Init Score {ds_start}" ) data.append((new_mol, new_mol2, gs, action)) except Exception as p: logger.error(p) traceback.print_tb(p.__traceback__) continue self.past_dockobjs.append(dockobj) self.past_receptors.append(receptor) logger.log("Sorting on", self.sort) if self.sort == 'dscores': order = np.argsort(dscores) logger.log([dscores[i] for i in order]) elif self.sort == 'pscores': order = np.argsort(pscores) logger.log([pscores[i] for i in order]) elif self.sort == 'iscores': order = np.argsort(ds_start_scores) logger.log([ds_start_scores[i] for i in order]) elif self.sort == 'hscores': hscores = [ np.quantile(np.clip(scoreset, None, 0), 0.) for scoreset in ds_old_scores ] order = np.argsort(hscores) logger.log([hscores[i] for i in order]) else: assert (False) self.env.data['dscores'].append(dscores) self.env.data['pscores'].append(pscores) self.env.data['iscores'].append(ds_start_scores) self.env.data['hscores'].append(ds_old_scores) data = [data[i] for i in order] return data
def getoutput(self, smi): mol = oe.OEGraphMol() ok = oe.OEParseSmiles(mol, smi) assert ok return oe.OECreateSmiString(mol, flags)
def getoutput(self, smi): mol = oe.OEGraphMol() ok = oe.OEParseSmiles(mol, smi) assert ok oe.OEAssignAromaticFlags(mol) return oe.OECreateSmiString(mol, 0)
def mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=True, mapped=True): """ Generate canonical SMILES with OpenEye. Parameters ---------- molecule: oechem.OEMol isomeric: bool If True, SMILES will include chirality and stereo bonds explicit_hydrogen: bool If True, SMILES will include explicit hydrogen mapped: bool If True, will include map indices (In order of OpenEye omega canonical ordering) generate_conformer: bool, optional. Default True Generating conformer is needed to infer stereochemistry if SMILES does not have stereochemistry specified. Sometimes, however, this can be very slow because the molecule has many rotatable bonds. Then it is recommended to turn off generate_conformer but the stereochemistry might not be specified in the isomeric SMILES Returns ------- smiles str """ molecule = oechem.OEMol(molecule) if has_atom_map(molecule): remove_atom_map(molecule) if explicit_hydrogen: if not has_explicit_hydrogen(molecule): oechem.OEAddExplicitHydrogens(molecule) # First check if geometry from JSON exists try: JSON_geometry = molecule.GetData('json_geometry') except ValueError: JSON_geometry = False if isomeric: if not has_stereo_defined(molecule): raise ValueError("Smiles must have stereochemistry defined.") if not explicit_hydrogen and not mapped and isomeric: return oechem.OEMolToSmiles(molecule) if not explicit_hydrogen and not mapped and not isomeric: return oechem.OECreateSmiString( molecule, oechem.OESMILESFlag_Canonical | oechem.OESMILESFlag_RGroups) if not mapped and explicit_hydrogen and isomeric: return oechem.OECreateSmiString( molecule, oechem.OESMILESFlag_Hydrogens | oechem.OESMILESFlag_ISOMERIC) if not mapped and explicit_hydrogen and not isomeric: return oechem.OECreateSmiString( molecule, oechem.OESMILESFlag_Hydrogens | oechem.OESMILESFlag_Canonical | oechem.OESMILESFlag_RGroups) if not JSON_geometry: # canonical order of atoms if input was SMILES or permute_xyz is true canonical_order_atoms(molecule) for atom in molecule.GetAtoms(): atom.SetMapIdx(atom.GetIdx() + 1) if mapped and not explicit_hydrogen: raise Warning("Tagged SMILES must include hydrogens to retain order") if mapped and not isomeric: raise Warning("Tagged SMILES must include stereochemistry ") return oechem.OEMolToSmiles(molecule)
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene", proposed_mol_name="benzene", current_mol_smiles=None, proposed_mol_smiles=None, vacuum=False, render_atom_mapping=False): """ This function will generate a topology proposal, old positions, and new positions with a geometry proposal (either vacuum or solvated) given a set of input iupacs or smiles. The function will (by default) read the iupac names first. If they are set to None, then it will attempt to read a set of current and new smiles. An atom mapping pdf will be generated if specified. Arguments ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule current_mol_smiles : str (default None) current mol smiles proposed_mol_smiles : str (default None) proposed mol smiles vacuum: bool (default False) whether to render a vacuum or solvated topology_proposal render_atom_mapping : bool (default False) whether to render the atom map of the current_mol_name and proposed_mol_name Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ import simtk.openmm.app as app from openmoltools import forcefield_generators from openeye import oechem from openmoltools.openeye import iupac_to_oemol, generate_conformers, smiles_to_oemol from openmoltools import forcefield_generators import perses.utils.openeye as openeye from perses.utils.data import get_data_filename from perses.rjmc.topology_proposal import TopologyProposal, SystemGenerator, SmallMoleculeSetProposalEngine import simtk.unit as unit from perses.rjmc.geometry import FFAllAngleGeometryEngine if current_mol_name != None and proposed_mol_name != None: try: old_oemol, new_oemol = iupac_to_oemol( current_mol_name), iupac_to_oemol(proposed_mol_name) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception( f"either {current_mol_name} or {proposed_mol_name} is not compatible with 'iupac_to_oemol' function!" ) elif current_mol_smiles != None and proposed_mol_smiles != None: try: old_oemol, new_oemol = smiles_to_oemol( current_mol_smiles), smiles_to_oemol(proposed_mol_smiles) old_smiles = oechem.OECreateSmiString( old_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) new_smiles = oechem.OECreateSmiString( new_oemol, oechem.OESMILESFlag_DEFAULT | oechem.OESMILESFlag_Hydrogens) except: raise Exception(f"the variables are not compatible") else: raise Exception( f"either current_mol_name and proposed_mol_name must be specified as iupacs OR current_mol_smiles and proposed_mol_smiles must be specified as smiles strings." ) old_oemol, old_system, old_positions, old_topology = openeye.createSystemFromSMILES( old_smiles, title="MOL") #correct the old positions old_positions = openeye.extractPositionsFromOEMol(old_oemol) old_positions = old_positions.in_units_of(unit.nanometers) new_oemol, new_system, new_positions, new_topology = openeye.createSystemFromSMILES( new_smiles, title="NEW") ffxml = forcefield_generators.generateForceFieldFromMolecules( [old_oemol, new_oemol]) old_oemol.SetTitle('MOL') new_oemol.SetTitle('MOL') old_topology = forcefield_generators.generateTopologyFromOEMol(old_oemol) new_topology = forcefield_generators.generateTopologyFromOEMol(new_oemol) if not vacuum: nonbonded_method = app.PME barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300.0 * unit.kelvin, 50) else: nonbonded_method = app.NoCutoff barostat = None gaff_xml_filename = get_data_filename("data/gaff.xml") system_generator = SystemGenerator( [gaff_xml_filename, 'amber99sbildn.xml', 'tip3p.xml'], barostat=barostat, forcefield_kwargs={ 'removeCMMotion': False, 'nonbondedMethod': nonbonded_method, 'constraints': app.HBonds, 'hydrogenMass': 4.0 * unit.amu }) system_generator._forcefield.loadFile(StringIO(ffxml)) proposal_engine = SmallMoleculeSetProposalEngine([old_smiles, new_smiles], system_generator, residue_name='MOL') geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=1000, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles=False) if not vacuum: #now to solvate modeller = app.Modeller(old_topology, old_positions) hs = [ atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name not in ['MOL', 'OLD', 'NEW'] ] modeller.delete(hs) modeller.addHydrogens(forcefield=system_generator._forcefield) modeller.addSolvent(system_generator._forcefield, model='tip3p', padding=9.0 * unit.angstroms) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() solvated_positions = unit.quantity.Quantity(value=np.array([ list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system) ]), unit=unit.nanometers) solvated_system = system_generator.build_system(solvated_topology) #now to create proposal top_proposal = proposal_engine.propose( current_system=solvated_system, current_topology=solvated_topology, current_mol=old_oemol, proposed_mol=new_oemol) new_positions, _ = geometry_engine.propose(top_proposal, solvated_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print( f"new_to_old: {proposal_engine.non_offset_new_to_old_atom_map}" ) render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, proposal_engine.non_offset_new_to_old_atom_map) return top_proposal, solvated_positions, new_positions else: vacuum_system = system_generator.build_system(old_topology) top_proposal = proposal_engine.propose(current_system=vacuum_system, current_topology=old_topology, current_mol=old_oemol, proposed_mol=new_oemol) new_positions, _ = geometry_engine.propose(top_proposal, old_positions, beta) if render_atom_mapping: from perses.utils.smallmolecules import render_atom_mapping print(f"new_to_old: {top_proposal._new_to_old_atom_map}") render_atom_mapping(f"{old_smiles}to{new_smiles}.png", old_oemol, new_oemol, top_proposal._new_to_old_atom_map) return top_proposal, old_positions, new_positions
oechem.OECanonicalOrderBonds(mol) # Assign a reasonable protomer if args.RetainProtonation: for atom in mol.GetAtoms(): atom.SetImplicitHCount(0) else: if not oequacpac.OEGetReasonableProtomer(mol): print 'Failed to get a reasonable protomer at pH 7.4' oechem.OEAssignAromaticFlags(mol, oechem.OEAroModelOpenEye) if not args.RetainProtonation: oechem.OEAddExplicitHydrogens(mol) smi = oechem.OECreateSmiString(mol, oechem.OESMILESFlag_Canonical) print 'The canonical SMILES for a reasonably protonated state is', smi # Generate conformations from openeye import oeomega mol_multiconf = oechem.OEMol(mol) oechem.OECanonicalOrderAtoms(mol_multiconf) omega = oeomega.OEOmega() # These parameters were chosen to match http://docs.eyesopen.com/toolkits/cookbook/python/modeling/am1-bcc.html omega.SetMaxConfs(800) omega.SetIncludeInput(False) omega.SetCanonOrder(False) omega.SetStrictStereo(False)
oechem.OEThrow.Usage("%s <infile> [<outfile>]" % sys.argv[0]) ims = oechem.oemolistream() ims.open(sys.argv[1]) if len(sys.argv) > 2: oms = oechem.oemolostream() oms.open(sys.argv[2]) else: oms = None natoms = [] nchars = [] for mol in ims.GetOEGraphMols(): na = mol.NumAtoms() smi = oechem.OECreateSmiString(mol) nc = len(re.sub(r'[^a-zA-Z]', '', smi)) natoms.append(na) nchars.append(nc) if oms: mol.SetTitle("%d\t%d" % (na, nc)) oechem.OEWriteMolecule(oms, mol) ims.close() if oms: oms.close() A = numpy.array(natoms) C = numpy.array(nchars) print "%s: N: %d" % (sys.argv[0], len(natoms))
def get_series(mol): from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import Descriptors series_SMARTS_dict = { #"3-aminopyridine": "[R1][C,N;R0;!$(NC(=O)CN)]C(=O)[C,N;R0;!$(NC(=O)CN)][c]1cnccc1", "3-aminopyridine-like": "[R1]!@[C,N]C(=O)[C,N]!@[R1]", "3-aminopyridine-strict": "c1ccncc1NC(=O)!@[R1]", "Ugi": "[c,C:1][C](=[O])[N]([c,C,#1:2])[C]([c,C,#1:3])([c,C,#1:4])[C](=[O])[NH1][c,C:5]", "quinolones": "NC(=O)c1cc(=O)[nH]c2ccccc12", "piperazine-chloroacetamide": "O=C(CCl)N1CCNCC1", #'benzotriazoles': 'c1ccc(NC(=O)[C,N]n2nnc3ccccc32)cc1', #'benzotriazoles': 'a1aaa([C,N]C(=O)[C,N]a2aaa3aaaaa32)aa1', 'benzotriazoles': 'a2aaa3aaaaa32', } smi = oechem.OECreateSmiString(mol) # Filter out covalent try: if oechem.OEGetSDData(mol,'acrylamide')=='True' or oechem.OEGetSDData(mol,'chloroacetamide')=='True': return None except Exception as e: print(e) def check_if_smi_in_series( smi, SMARTS, MW_cutoff=550, num_atoms_cutoff=70, num_rings_cutoff=10 ): mol = Chem.MolFromSmiles(smi) MW = Chem.Descriptors.MolWt(mol) num_heavy_atoms = mol.GetNumHeavyAtoms() num_rings = Chem.rdMolDescriptors.CalcNumRings(mol) patt = Chem.MolFromSmarts(SMARTS) if ( ( len( Chem.AddHs(Chem.MolFromSmiles(smi)).GetSubstructMatches( patt ) ) > 0 ) and (MW <= MW_cutoff) and (num_heavy_atoms <= num_atoms_cutoff) and (num_rings <= num_rings_cutoff) ): return True else: return False for series in series_SMARTS_dict: series_SMARTS = series_SMARTS_dict[series] if series == "3-amonipyridine-like": if check_if_smi_in_series( smi, series_SMARTS, MW_cutoff=410, num_rings_cutoff=3, num_atoms_cutoff=28, ): return series else: if check_if_smi_in_series(smi, series_SMARTS): return series return None
def process(self, record, port): try: # The copy of the dictionary option as local variable # is necessary to avoid filename collisions due to # the parallel cube processes opt = dict(self.opt) # Create the MD record to use the MD Record API mdrecord = MDDataRecord(record) # Logger string opt['Logger'].info(' ') system_title = mdrecord.get_title #sys_id = mdrecord.get_flask_id opt['Logger'].info( '{}: Attempting MD Traj conversion into OEMols'.format( system_title)) traj_fn = mdrecord.get_stage_trajectory() opt['Logger'].info('{} Temp Directory: {}'.format( system_title, os.path.dirname(traj_fn))) opt['Logger'].info('{} Trajectory filename: {}'.format( system_title, traj_fn)) # Generate multi-conformer protein and ligand OEMols from the trajectory opt['Logger'].info( '{} Generating protein and ligand trajectory OEMols'.format( system_title)) flask = mdrecord.get_flask md_components = record.get_value(Fields.md_components) # opt['Logger'].info(md_components.get_info) # Check Ligand Isomeric Smiles lig_comp = md_components.get_ligand lig_ref = record.get_value(Fields.ligand) smi_lig_comp = oechem.OECreateSmiString(lig_comp) smi_lig_ref = oechem.OECreateSmiString(lig_ref) if smi_lig_ref != smi_lig_comp: raise ValueError( "Ligand Isomeric Smiles String check failure: {} vs {}". format(smi_lig_comp, smi_lig_ref)) ptraj, ltraj, wtraj = utl.extract_aligned_prot_lig_wat_traj( md_components, flask, traj_fn, opt, water_cutoff=opt['water_cutoff']) ltraj.SetTitle(record.get_value(Fields.ligand_name)) ptraj.SetTitle(record.get_value(Fields.protein_name)) opt['Logger'].info( '{} #atoms, #confs in protein traj OEMol: {}, {}'.format( system_title, ptraj.NumAtoms(), ptraj.NumConfs())) opt['Logger'].info( '{} #atoms, #confs in ligand traj OEMol: {}, {}'.format( system_title, ltraj.NumAtoms(), ltraj.NumConfs())) opt['Logger'].info( '{} #atoms, #confs in water traj OEMol: {}, {}'.format( system_title, wtraj.NumAtoms(), wtraj.NumConfs())) # Create new record with OETraj results oetrajRecord = OERecord() oetrajRecord.set_value(OEField('LigTraj', Types.Chem.Mol), ltraj) if wtraj: oetrajRecord.set_value(OEField('WatTraj', Types.Chem.Mol), wtraj) if in_orion(): oetrajRecord.set_value(Fields.collection, mdrecord.collection_id) mdrecord_traj = MDDataRecord(oetrajRecord) mdrecord_traj.set_protein_traj(ptraj, shard_name="ProteinTrajConfs_") record.set_value(Fields.Analysis.oetraj_rec, oetrajRecord) # update or initiate the list of analyses that have been done if record.has_value(Fields.Analysis.analysesDone): analysesDone = utl.RequestOEFieldType( record, Fields.Analysis.analysesDone) analysesDone.append('OETraj') else: analysesDone = ['OETraj'] record.set_value(Fields.Analysis.analysesDone, analysesDone) opt['Logger'].info( '{}: saved protein, ligand and water traj OEMols'.format( system_title)) self.success.emit(record) del mdrecord del mdrecord_traj except Exception as e: print("Failed to complete", str(e), flush=True) self.log.error(traceback.format_exc()) # Return failed mol self.failure.emit(record) return
can = m.can except: iok = F #print(' conversion failed!')#pass else: m = Mol(o.zs, o.coords, ican=True) can = m.can else: print(f) m = Mol(o.zs, o.coords, ican=True) can = m.can if (can != 'None') and (can_fmt in ['oechem',]): from openeye import oechem oem = oechem.OEGraphMol() assert oechem.OESmilesToMol(oem, can) can = oechem.OECreateSmiString(oem, oechem.OESMILESFlag_Canonical) s1 = '' if iok else ' [ conversion failed ]' if isf: print( i+1, f, can, s1 ) else: print( i+1, f, obj, can, s1 ) if sdf: zs, coords, chgs, bom = m.blk if m.na < 100: sdf = f[:-4]+'.sdf' write_ctab(zs, chgs, bom, coords, sdf=sdf) else: pdb = f[:-4]+'.pdb' write_pdb(m.blk, pdb)
def generate_fragments(molecule, generate_visualization=False, strict_stereo=False, combinatorial=True, MAX_ROTORS=2, remove_map=True, json_filename=None): """ This function generates fragments from molecules. The output is a dictionary that maps SMILES of molecules to SMILES for fragments. The default SMILES are generated with openeye.oechem.OEMolToSmiles. These SMILES strings are canonical isomeric SMILES. The dictionary also includes a provenance field which defines how the fragments were generated. Parameters ---------- molecule: OEMol to fragment generate_visualization: bool If true, visualization of the fragments will be written to pdf files. The pdf will be writtten in the directory where this function is run from. combinatorial: bool If true, find all connected fragments from fragments and add all new fragments that have less than MAX_ROTORS MAX_ROTORS: int rotor threshold for combinatorial strict_stereo: bool Note: This applies to the molecule being fragmented. Not the fragments. If True, omega will generate conformation with stereochemistry defined in the SMILES string for charging. remove_map: bool If True, the index tags will be removed. This will remove duplicate fragments. Defualt True json_filename: str filenmae for JSON. If provided, will save the returned dictionary to a JSON file. Default is None Returns ------- fragments: dict mapping of SMILES from the parent molecule to the SMILES of the fragments """ fragments = dict() try: molecules = list(molecule) except TypeError: molecules = [molecule] for molecule in molecules: # normalize molecule molecule = normalize_molecule(molecule, molecule.GetTitle()) if remove_map: # Remove tags from smiles. This is done to make it easier to find duplicate fragments for a in molecule.GetAtoms(): a.SetMapIdx(0) frags = _generate_fragments(molecule, strict_stereo=strict_stereo) if not frags: logger().warning('Skipping {}, SMILES: {}'.format(molecule.GetTitle(), oechem.OECreateSmiString(molecule))) continue charged = frags[0] frags = frags[-1] frag_list = list(frags.values()) if combinatorial: smiles = smiles_with_combined(frag_list, charged, MAX_ROTORS) else: smiles = frag_to_smiles(frag_list, charged) parent_smiles = mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=False, mapped=False) if smiles: fragments[parent_smiles] = list(smiles.keys()) else: # Add molecule where no fragments were found for terminal torsions and / or rings and non rotatable bonds fragments[parent_smiles] = [mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=True, mapped=False)] if generate_visualization: IUPAC = oeiupac.OECreateIUPACName(molecule) name = molecule.GetTitle() if IUPAC == name: name = make_python_identifier(oechem.OEMolToSmiles(molecule))[0] oname = '{}.pdf'.format(name) ToPdf(charged, oname, frags) del charged, frags if json_filename: f = open(json_filename, 'w') j = json.dump(fragments, f, indent=2, sort_keys=True) f.close() return fragments
ionic_strength_millimolar=ionic_strength_millimolar, pH=pH, fah_projects=fah_projects) # Compounds from fah_xchem.schema import Compound, CompoundMetadata smiles_flag = oechem.OESMILESFlag_Canonical | oechem.OESMILESFlag_ISOMERIC from openeye import oechem print('Processing compounds...') compounds = dict() with oechem.oemolistream(compounds_sdf_filename) as ifs: for oemol in ifs.GetOEGraphMols(): # Set ID and SMILES compound_id = oemol.GetTitle() smiles = oechem.OECreateSmiString(oemol, smiles_flag) # Extract experimental data, if present experimental_data = dict() if oechem.OEHasSDData(oemol, 'f_avg_pIC50'): pIC50 = oechem.OEGetSDData(oemol, 'f_avg_pIC50') if pIC50 != '': pIC50 = float(pIC50) experimental_data['pIC50'] = pIC50 # Extract information about the compound compound_metadata = CompoundMetadata( compound_id=compound_id, smiles=oechem.OECreateSmiString(oemol, smiles_flag), experimental_data=experimental_data, ) # Create new compound compound = Compound(metadata=compound_metadata, microstates=list())
def generate_fragments(inputf, output_dir, pdf=False, combinatorial=True, MAX_ROTORS=2, strict_stereo=True, remove_map=True): """ This function generates fragment SMILES files sorted by rotatable bonds from an input molecule file. The output .smi files are written out to `output_dir` and named `nrotor_n.smi` where n corresponds to the number of rotatable bonds for all fragments in the file. Parameters ---------- inputf: str absolute path to input molecule file output_dir: str absolute path to output directory pdf: bool If true, visualization of the fragments will be written to pdf files. The pdf will be writtten in the directory where this function is run from. combinatorial: bool If true, find all connected fragments from fragments and add all new fragments that have less than MAX_ROTORS MAX_ROTORS: int rotor threshold for combinatorial """ ifs = oechem.oemolistream() smiles_unique = set() mol = oechem.OEMol() if ifs.open(inputf): while oechem.OEReadMolecule(ifs, mol): openeye.normalize_molecule(mol) logger().info('fragmenting {}...'.format(mol.GetTitle())) if remove_map: # Remove tags from smiles. This is done to make it easier to find duplicate fragments for a in mol.GetAtoms(): a.SetMapIdx(0) frags = _generate_fragments(mol, strict_stereo=strict_stereo) if not frags: logger().warn('Skipping {}, SMILES: {}'.format( mol.GetTitle(), oechem.OECreateSmiString(mol))) continue charged = frags[0] frags = frags[-1] if combinatorial: smiles = smiles_with_combined(frags, charged, MAX_ROTORS=MAX_ROTORS) else: smiles = frag_to_smiles(frags, charged) smiles_unique.update(list(smiles.keys())) if pdf: oname = '{}.pdf'.format(mol.GetTitle()) ToPdf(charged, oname, frags) del charged, frags # Generate oedatabase for all fragments split_fname = inputf.split('.') base = split_fname[-2].split('/')[-1] ofname = base + '_frags' utils.to_smi(list(smiles_unique), output_dir, ofname) ofname_ext = ofname + '.smi' oedb_name = os.path.join(output_dir, ofname_ext) utils.create_oedatabase_idxfile(oedb_name) _sort_by_rotbond(oedb_name, outdir=output_dir)