def search(self, mol0, mol1): mol0 = mol0._struc mol1 = mol1._struc p0 = mol0.CreateCopy() p1 = mol1.CreateCopy() #set atom int type. for mol in ( p0, p1, ): for atom in mol.GetAtoms(): if (atom.IsHydrogen()): atom.SetIntType(1) else: atom.SetIntType(2) #suppress hydrogens before mcs search oechem.OESuppressHydrogens(p0) oechem.OESuppressHydrogens(p1) if (self._is_approximate): mcss = oechem.OEMCSSearch(p1, self._atom_expr, self._bond_expr, oechem.OEMCSType_Approximate) else: mcss = oechem.OEMCSSearch(p1, self._atom_expr, self._bond_expr) #set minimum atom of the mcs mcss.SetMinAtoms(1) #set the function to evalue the mcs search mcss.SetMCSFunc(oechem.OEMCSMaxAtomsCompleteCycles(1.5)) # There could be multiple matches. We select the one with the maximum number of atoms. # If there are more than 1 matches with the same maximum number of atoms, we arbitrarily select the first one. mcs_mol = None max_num = 0 #do the mcs search for match in mcss.Match(p0, True): num_atom = 0 mcs_tmp = oechem.OEMol() oechem.OESubsetMol(mcs_tmp, match, True) oechem.OEFindRingAtomsAndBonds(mcs_tmp) for atom in mcs_tmp.GetAtoms(): if (not atom.IsHydrogen()): num_atom += 1 if (num_atom > max_num): max_num = num_atom mcs_mol = mcs_tmp atom_match0 = [] atom_match1 = [] for matchpair in match.GetAtoms(): atom_match0.append(matchpair.target.GetIdx() + 1) atom_match1.append(matchpair.pattern.GetIdx() + 1) #dump search result to kbase if (mcs_mol): mol0 = struc.OeStruc(mol0) mol1 = struc.OeStruc(mol1) mcs_mol = struc.OeStruc(mcs_mol) return self.deposit_to_kbase(mol0.id(), mol1.id(), atom_match0, atom_match1)
def LigandProteinCloseContacts(prot, lig, maxgap): """atoms in the protein within maxgap Angstroms of the ligand""" oechem.OESuppressHydrogens(prot) oechem.OESuppressHydrogens(lig) DropLigandFromProtein(prot, lig) nn = oechem.OENearestNbrs(prot, maxgap) return list(nn.GetNbrs(lig))
def _process_mol(mol: oechem.OEMol, explicit_H: Optional[str] = None): if explicit_H == 'all': oechem.OEAddExplicitHydrogens(mol) elif explicit_H == 'polar': oechem.OESuppressHydrogens(mol, explicit_H) elif explicit_H is None: oechem.OESuppressHydrogens(mol) else: raise ValueError oechem.OEAssignAromaticFlags(mol) oechem.OEAssignHybridization(mol) oechem.OEAssignFormalCharges(mol) mol.Sweep()
def main(argv=[__name__]): if len(argv) != 4: oechem.OEThrow.Usage("%s <refmol> <fitmol> <outfile>" % argv[0]) reffs = oechem.oemolistream() if not reffs.open(argv[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1]) if not oechem.OEIs3DFormat(reffs.GetFormat()): oechem.OEThrow.Fatal("Invalid input format: need 3D coordinates") refmol = oechem.OEGraphMol() if not oechem.OEReadMolecule(reffs, refmol): oechem.OEThrow.Fatal("Unable to read molecule in %s" % argv[1]) if not refmol.GetDimension() == 3: oechem.OEThrow.Fatal("%s doesn't have 3D coordinates" % refmol.GetTitle()) fitfs = oechem.oemolistream() if not fitfs.open(argv[2]): oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[2]) if not oechem.OEIs3DFormat(fitfs.GetFormat()): oechem.OEThrow.Fatal("Invalid input format: need 3D coordinates") ofs = oechem.oemolostream() if not ofs.open(argv[3]): oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[3]) if not oechem.OEIs3DFormat(ofs.GetFormat()): oechem.OEThrow.Fatal("Invalid output format: need 3D coordinates") oechem.OEWriteConstMolecule(ofs, refmol) oechem.OESuppressHydrogens(refmol) for fitmol in fitfs.GetOEGraphMols(): if not fitmol.GetDimension() == 3: oechem.OEThrow.Warning("%s doesn't have 3D coordinates" % fitmol.GetTitle()) continue MCSAlign(refmol, fitmol, ofs)
def prepare(self): """[summary] # OESuppressHydrogens(self.__oeMol, retainPolar=False,retainStereo=True,retainIsotope=True) oechem.OESuppressHydrogens(self.__oeMol) """ self.__setupImage() for idx, cell in enumerate(self.__grid.GetCells()): ccId, oeMol, title = self._molTitleList[idx] logger.debug("Preparing %s %r", ccId, title) # if self._params["suppressHydrogens"]: # mol = oeMol.getGraphMolSuppressH() # OESuppressHydrogens(self.__oeMol, retainPolar=False,retainStereo=True,retainIsotope=True) mol = oechem.OESuppressHydrogens(oechem.OEGraphMol(oeMol)) else: mol = oeMol # if self.__useTitle and title: mol.SetTitle(title) self._opts.SetTitleHeight(5.0) else: mol.SetTitle("") # # oedepict.OEPrepareDepiction(mol) self._opts.SetDimensions(cell.GetWidth(), cell.GetHeight(), oedepict.OEScale_AutoScale) self._assignDisplayOptions() disp = oedepict.OE2DMolDisplay(mol, self._opts) oedepict.OERenderMolecule(cell, disp) if self._params["cellBorders"]: oedepict.OEDrawBorder(cell, oedepict.OEPen(oedepict.OEBlackPen))
def test_adjacency(smiles, expected_adj): if not oechem.OEChemIsLicensed(): logging.warning( "License for OpenEye OEChem TK is not found. Not testing featurizers." ) return True mol = oechem.OEMol() oechem.OESmilesToMol(mol, smiles) oechem.OESuppressHydrogens(mol) adj = Adjacency(mol=mol) assert adj.adj_mat.tolist() == expected_adj
def ImportMolecule(filename): ifs = oechem.oemolistream() if not ifs.open(filename): oechem.OEThrow.Fatal("Unable to open %s for reading" % filename) mol = oechem.OEGraphMol() oechem.OEReadMolecule(ifs, mol) oechem.OEAssignBondiVdWRadii(mol) oechem.OESuppressHydrogens(mol) return mol
def align2d(file1, file2): atomexpr = oechem.OEExprOpts_AtomicNumber | oechem.OEExprOpts_RingMember bondexpr = oechem.OEExprOpts_RingMember ifs1 = oechem.oemolistream(file1) ifs2 = oechem.oemolistream(file2) ifs1.SetConfTest(oechem.OEAbsCanonicalConfTest()) ifs2.SetConfTest(oechem.OEAbsCanonicalConfTest()) popts, dopts, report = prep_pdf_writer() for mol1, mol2 in zip(ifs1.GetOEMols(), ifs2.GetOEMols()): oechem.OESuppressHydrogens(mol1) oechem.OESuppressHydrogens(mol2) oechem.OEGenerate2DCoordinates(mol2) ss = oechem.OESubSearch(mol2, atomexpr, bondexpr) oechem.OEPrepareSearch(mol1, ss) alignres = oedepict.OEPrepareAlignedDepiction(mol1, ss) if not alignres.IsValid(): oechem.OEThrow.Error( "Substructure is not found in input molecule!") cell1 = report.NewCell() cell2 = report.NewCell() oedepict.OEPrepareDepiction(mol1, popts) oedepict.OEPrepareDepiction(mol2, popts) disp1 = oedepict.OE2DMolDisplay(mol1, dopts) disp2 = oedepict.OE2DMolDisplay(mol2, dopts) oedepict.OERenderMolecule(cell1, disp1) oedepict.OERenderMolecule(cell2, disp2) ofs = oechem.oeofstream() if not ofs.open('output.pdf'): oechem.OEThrow.Fatal("Cannot open output file!") oedepict.OEWriteReport(ofs, "pdf", report)
def main(args): if len(args) != 4: oechem.OEThrow.Usage("%s <protein> <ligand> <surface>" % args[0]) prtfs = oechem.oemolistream(args[1]) prt = oechem.OEGraphMol() oechem.OEReadMolecule(prtfs, prt) oechem.OESuppressHydrogens(prt) oechem.OEAssignBondiVdWRadii(prt) ligfs = oechem.oemolistream(args[2]) lig = oechem.OEGraphMol() oechem.OEReadMolecule(ligfs, lig) oechem.OESuppressHydrogens(lig) oechem.OEAssignBondiVdWRadii(lig) grid = oegrid.OEScalarGrid() oespicoli.OEMakeVoidVolume(prt, lig, grid, 0.5) surf = oespicoli.OESurface() oespicoli.OEMakeSurfaceFromGrid(surf, grid, 0.5) oespicoli.OEWriteSurface(args[3], surf) return 0
def _OEFixConnectionNH(protein): """ Temporary fix, thanks to Jesper! """ for atom in protein.GetAtoms( oechem.OEAndAtom(oespruce.OEIsModeledAtom(), oechem.OEIsNitrogen())): if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_N: expected_h_count = 1 if oechem.OEGetResidueIndex(atom) == oechem.OEResidueIndex_PRO: expected_h_count = 0 if atom.GetTotalHCount() != expected_h_count: oechem.OESuppressHydrogens(atom) atom.SetImplicitHCount(1) oechem.OEAddExplicitHydrogens(protein, atom) for nbr in atom.GetAtoms(oechem.OEIsHydrogen()): oechem.OESet3DHydrogenGeom(protein, nbr)
def PrepareDepiction(mol, clearcoords=False, suppressH=True): oechem.OESetDimensionFromCoords(mol) oechem.OEPerceiveChiral(mol) if mol.GetDimension() != 2 or clearcoords: if mol.GetDimension() == 3: oechem.OE3DToBondStereo(mol) oechem.OE3DToAtomStereo(mol) if suppressH: oechem.OESuppressHydrogens(mol) oechem.OEAddDepictionHydrogens(mol) oechem.OEDepictCoordinates(mol) oechem.OEMDLPerceiveBondStereo(mol) mol.SetDimension(2) return True
def prepare(self): self.__setupImage() rows = self._params["gridRows"] cols = self._params["gridCols"] grid = oedepict.OEImageGrid(self.__image, rows, cols) citer = grid.GetCells() for ccId, oeMol, title in self._molTitleList: logger.debug("Preparing %s %r", ccId, title) if not citer.IsValid(): # go to next page self.__image = self.__multi.NewPage() grid = oedepict.OEImageGrid(self.__image, rows, cols) grid.SetCellGap(self._params["cellGap"]) grid.SetMargins(self._params["cellMargin"]) citer = grid.GetCells() cell = citer.Target() # if self._params["suppressHydrogens"]: # mol = oeMol.getGraphMolSuppressH() # OESuppressHydrogens(self.__oeMol, retainPolar=False,retainStereo=True,retainIsotope=True) mol = oechem.OESuppressHydrogens(oechem.OEGraphMol(oeMol)) else: mol = oeMol if self.__useTitle and title: mol.SetTitle(title) self._opts.SetTitleHeight(5.0) else: mol.SetTitle("") # # oedepict.OEPrepareDepiction(mol) self._opts.SetDimensions(cell.GetWidth(), cell.GetHeight(), oedepict.OEScale_AutoScale) self._assignDisplayOptions() disp = oedepict.OE2DMolDisplay(mol, self._opts) oedepict.OERenderMolecule(cell, disp) oedepict.OEDrawBorder(cell, oedepict.OEPen(oedepict.OEBlackPen)) citer.Next()
def generate_restricted_conformers(receptor, refmol, mol, core_smarts=None): """ Generate and select a conformer of the specified molecule using the reference molecule Parameters ---------- receptor : openeye.oechem.OEGraphMol Receptor (already prepped for docking) for identifying optimal pose refmol : openeye.oechem.OEGraphMol Reference molecule which shares some part in common with the proposed molecule mol : openeye.oechem.OEGraphMol Molecule whose conformers are to be enumerated core_smarts : str, optional, default=None If core_smarts is specified, substructure will be extracted using SMARTS. """ from openeye import oechem, oeomega logging.debug( f'mol: {oechem.OEMolToSmiles(mol)} | core_smarts: {core_smarts}') # Be quiet from openeye import oechem oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Quiet) #oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Error) # Get core fragment if core_smarts: # Truncate refmol to SMARTS if specified #print(f'Trunctating using SMARTS {refmol_smarts}') ss = oechem.OESubSearch(core_smarts) oechem.OEPrepareSearch(refmol, ss) for match in ss.Match(refmol): core_fragment = oechem.OEGraphMol() oechem.OESubsetMol(core_fragment, match) logging.debug( f'Truncated refmol to generate core_fragment: {oechem.OEMolToSmiles(core_fragment)}' ) break #print(f'refmol has {refmol.NumAtoms()} atoms') else: core_fragment = GetCoreFragment(refmol, [mol]) oechem.OESuppressHydrogens(core_fragment) #print(f' Core fragment has {core_fragment.NumAtoms()} heavy atoms') MIN_CORE_ATOMS = 6 if core_fragment.NumAtoms() < MIN_CORE_ATOMS: return None # Create an Omega instance #omegaOpts = oeomega.OEOmegaOptions() omegaOpts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Dense) # Set the fixed reference molecule omegaFixOpts = oeomega.OEConfFixOptions() omegaFixOpts.SetFixMaxMatch(10) # allow multiple MCSS matches omegaFixOpts.SetFixDeleteH(True) # only use heavy atoms omegaFixOpts.SetFixMol(core_fragment) #omegaFixOpts.SetFixSmarts(core_smarts) # DEBUG omegaFixOpts.SetFixRMS(0.5) # This causes a warning: #Warning: OESubSearch::Match() is unable to match unset hybridization in the target (EN300-221518_3_1) for patterns with set hybridization, call OEPrepareSearch on the target first #atomexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_Hybridization atomexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_AtomicNumber bondexpr = oechem.OEExprOpts_BondOrder | oechem.OEExprOpts_Aromaticity omegaFixOpts.SetAtomExpr(atomexpr) omegaFixOpts.SetBondExpr(bondexpr) omegaOpts.SetConfFixOptions(omegaFixOpts) molBuilderOpts = oeomega.OEMolBuilderOptions() molBuilderOpts.SetStrictAtomTypes( False) # don't give up if MMFF types are not found omegaOpts.SetMolBuilderOptions(molBuilderOpts) omegaOpts.SetWarts(False) # expand molecule title omegaOpts.SetStrictStereo(True) # set strict stereochemistry omegaOpts.SetIncludeInput(False) # don't include input omegaOpts.SetMaxConfs(1000) # generate lots of conformers omegaOpts.SetEnergyWindow(20.0) # allow high energies omega = oeomega.OEOmega(omegaOpts) # TODO: Expand protonation states and tautomers from openeye import oequacpac if not oequacpac.OEGetReasonableProtomer(mol): logging.warning('No reasonable protomer found') return None mol = oechem.OEMol(mol) # multi-conformer molecule ret_code = omega.Build(mol) if (mol.GetDimension() != 3) or (ret_code != oeomega.OEOmegaReturnCode_Success): msg = f'\nOmega failure for {mol.GetTitle()} : SMILES {oechem.OEMolToSmiles(mol)} : core_smarts {core_smarts} : {oeomega.OEGetOmegaError(ret_code)}\n' logging.warning(msg) return None # Return the molecule with an error code #oechem.OESetSDData(mol, 'error', '{oeomega.OEGetOmegaError(ret_code)}') #return mol # Extract poses class Pose(object): def __init__(self, conformer): self.conformer = conformer self.clash_score = None self.docking_score = None self.overlap_score = None poses = [Pose(conf) for conf in mol.GetConfs()] # Score clashes bump_check = BumpCheck(receptor) for pose in poses: pose.clash_score = bump_check.count(pose.conformer) # Score docking poses from openeye import oedocking score = oedocking.OEScore(oedocking.OEScoreType_Chemgauss4) score.Initialize(receptor) for pose in poses: pose.docking_score = score.ScoreLigand(pose.conformer) # Compute overlap scores from openeye import oeshape overlap_prep = oeshape.OEOverlapPrep() overlap_prep.Prep(refmol) shapeFunc = oeshape.OEExactShapeFunc() shapeFunc.SetupRef(refmol) oeshape_result = oeshape.OEOverlapResults() for pose in poses: tmpmol = oechem.OEGraphMol(pose.conformer) overlap_prep.Prep(tmpmol) shapeFunc.Overlap(tmpmol, oeshape_result) pose.overlap_score = oeshape_result.GetRefTversky() # Filter poses based on top 10% of overlap poses = sorted(poses, key=lambda pose: pose.overlap_score) poses = poses[int(0.9 * len(poses)):] # Select the best docking score import numpy as np poses = sorted(poses, key=lambda pose: pose.docking_score) pose = poses[0] mol.SetActive(pose.conformer) oechem.OESetSDData(mol, 'clash_score', str(pose.clash_score)) oechem.OESetSDData(mol, 'docking_score', str(pose.docking_score)) oechem.OESetSDData(mol, 'overlap_score', str(pose.overlap_score)) # Convert to single-conformer molecule mol = oechem.OEGraphMol(mol) # Compute MMFF energy energy = mmff_energy(mol) oechem.OESetSDData(mol, 'MMFF_internal_energy', str(energy)) # Store SMILES docked_smiles = oechem.OEMolToSmiles(mol) oechem.OESetSDData(mol, 'docked_smiles', docked_smiles) return mol
def generate_restricted_conformers(receptor, refmol, mol, core_smarts=None): """ Generate and select a conformer of the specified molecule using the reference molecule Parameters ---------- receptor : openeye.oechem.OEGraphMol Receptor (already prepped for docking) for identifying optimal pose refmol : openeye.oechem.OEGraphMol Reference molecule which shares some part in common with the proposed molecule mol : openeye.oechem.OEGraphMol Molecule whose conformers are to be enumerated core_smarts : str, optional, default=None If core_smarts is specified, substructure will be extracted using SMARTS. """ from openeye import oechem, oeomega # DEBUG: For benzotriazoles, truncate refmol core_smarts = 'c1ccc(NC(=O)[C,N]n2nnc3ccccc32)cc1' # prospective core_smarts = 'NC(=O)[C,N]n2nnc3ccccc32' # retrospective # Get core fragment if core_smarts: # Truncate refmol to SMARTS if specified #print(f'Trunctating using SMARTS {refmol_smarts}') ss = oechem.OESubSearch(core_smarts) oechem.OEPrepareSearch(refmol, ss) for match in ss.Match(refmol): core_fragment = oechem.OEGraphMol() oechem.OESubsetMol(core_fragment, match) break #print(f'refmol has {refmol.NumAtoms()} atoms') else: core_fragment = GetCoreFragment(refmol, [mol]) oechem.OESuppressHydrogens(core_fragment) #print(f' Core fragment has {core_fragment.NumAtoms()} heavy atoms') MIN_CORE_ATOMS = 6 if core_fragment.NumAtoms() < MIN_CORE_ATOMS: return None # Create an Omega instance #omegaOpts = oeomega.OEOmegaOptions() omegaOpts = oeomega.OEOmegaOptions(oeomega.OEOmegaSampling_Dense) # Set the fixed reference molecule omegaFixOpts = oeomega.OEConfFixOptions() omegaFixOpts.SetFixMaxMatch(10) # allow multiple MCSS matches omegaFixOpts.SetFixDeleteH(True) # only use heavy atoms omegaFixOpts.SetFixMol(core_fragment) #omegaFixOpts.SetFixSmarts(smarts) omegaFixOpts.SetFixRMS(0.5) atomexpr = oechem.OEExprOpts_Aromaticity | oechem.OEExprOpts_Hybridization bondexpr = oechem.OEExprOpts_BondOrder | oechem.OEExprOpts_Aromaticity omegaFixOpts.SetAtomExpr(atomexpr) omegaFixOpts.SetBondExpr(bondexpr) omegaOpts.SetConfFixOptions(omegaFixOpts) molBuilderOpts = oeomega.OEMolBuilderOptions() molBuilderOpts.SetStrictAtomTypes(False) # don't give up if MMFF types are not found omegaOpts.SetMolBuilderOptions(molBuilderOpts) omegaOpts.SetWarts(False) # expand molecule title omegaOpts.SetStrictStereo(False) # set strict stereochemistry omegaOpts.SetIncludeInput(False) # don't include input omegaOpts.SetMaxConfs(1000) # generate lots of conformers #omegaOpts.SetEnergyWindow(10.0) # allow high energies omega = oeomega.OEOmega(omegaOpts) from openeye import oequacpac if not oequacpac.OEGetReasonableProtomer(mol): print('No reasonable protomer found') return None mol = oechem.OEMol(mol) # multi-conformer molecule ret_code = omega.Build(mol) if (mol.GetDimension() != 3) or (ret_code != oeomega.OEOmegaReturnCode_Success): print(f'Omega failure: {mol.GetDimension()} and {oeomega.OEGetOmegaError(ret_code)}') return None # Extract poses class Pose(object): def __init__(self, conformer): self.conformer = conformer self.clash_score = None self.docking_score = None self.overlap_score = None poses = [ Pose(conf) for conf in mol.GetConfs() ] # Score clashes bump_check = BumpCheck(receptor) for pose in poses: pose.clash_score = bump_check.count(pose.conformer) # Score docking poses from openeye import oedocking score = oedocking.OEScore(oedocking.OEScoreType_Chemgauss4) score.Initialize(receptor) for pose in poses: pose.docking_score = score.ScoreLigand(pose.conformer) # Compute overlap scores from openeye import oeshape overlap_prep = oeshape.OEOverlapPrep() overlap_prep.Prep(refmol) shapeFunc = oeshape.OEExactShapeFunc() shapeFunc.SetupRef(refmol) oeshape_result = oeshape.OEOverlapResults() for pose in poses: tmpmol = oechem.OEGraphMol(pose.conformer) overlap_prep.Prep(tmpmol) shapeFunc.Overlap(tmpmol, oeshape_result) pose.overlap_score = oeshape_result.GetRefTversky() # Filter poses based on top 10% of overlap poses = sorted(poses, key= lambda pose : pose.overlap_score) poses = poses[int(0.9*len(poses)):] # Select the best docking score import numpy as np poses = sorted(poses, key=lambda pose : pose.docking_score) pose = poses[0] mol.SetActive(pose.conformer) oechem.OESetSDData(mol, 'clash_score', str(pose.clash_score)) oechem.OESetSDData(mol, 'docking_score', str(pose.docking_score)) oechem.OESetSDData(mol, 'overlap_score', str(pose.overlap_score)) # Convert to single-conformer molecule mol = oechem.OEGraphMol(mol) return mol
def main(argv=[__name__]): """ itf = oechem.OEInterface() oechem.OEConfigure(itf, InterfaceData) if not oechem.OEParseCommandLine(itf, argv): return 1 oname = itf.GetString("-out") iname = itf.GetString("-in") ext = oechem.OEGetFileExtension(oname) if not oedepict.OEIsRegisteredImageFile(ext): oechem.OEThrow.Fatal("Unknown image type!") ofs = oechem.oeofstream() if not ofs.open(oname): oechem.OEThrow.Fatal("Cannot open output file!") ## INPUT PARAMETERS ######################################################### ######################################################### mm = 'tyk2/og_pdbs' qml = 'tyk2/forward_snapshots' phase = 'solvent' which_ligand = 'old' dir_name = iname ligand_pdbs_mm = glob.glob(f"{mm}/{dir_name}/{which_ligand}*{phase}.pdb") print(len(ligand_pdbs_mm)) ligand_pdbs_qml = glob.glob(f"{qml}/{dir_name}/{which_ligand}*{phase}.pdb") print(len(ligand_pdbs_qml)) #d = np.load('full_data_dict.npy', allow_pickle=True) from_ligand, to_ligand = iname.replace('from', '').replace('to', '').replace('lig', '') print(from_ligand) print(to_ligand) #key1 = (1, 8) #key2 = ('solvent', which_ligand) ######################################################### ######################################################### #d = d.flatten()[0] #work = d[key1][key2] #print(work) for i, (mm_pdb_path, ani_pdb_path) in enumerate(zip(ligand_pdbs_mm, ligand_pdbs_qml)): print(mm_pdb_path, ani_pdb_path) if i == 0: MM_mol = createOEMolFromSDF(mm_pdb_path, 0) ANI_mol = createOEMolFromSDF(ani_pdb_path, 0) else: # there absolutely must be a better/faster way of doing this because this is ugly and slow MM_mol.NewConf(createOEMolFromSDF(mm_pdb_path, 0)) ANI_mol.NewConf(createOEMolFromSDF(ani_pdb_path, 0)) """ ofs = oechem.oeofstream() oname = f"tor_out" ext = oechem.OEGetFileExtension(oname) mm_pdb_path = f"og_lig0_solvent.pdb" ani_pdb_path = f"forward_lig0.solvent.pdb" MM_mol = createOEMolFromSDF(mm_pdb_path, 0) ANI_mol = createOEMolFromSDF(ani_pdb_path, 0) mol = MM_mol mol2 = ANI_mol for m in [mol, mol2]: oechem.OESuppressHydrogens(m) oechem.OECanonicalOrderAtoms(m) oechem.OECanonicalOrderBonds(m) m.Sweep() refmol = None stag = "dihedral_histogram" itag = oechem.OEGetTag(stag) nrbins = 20 print(mol.NumConfs()) print(mol2.NumConfs()) get_dihedrals(mol, itag) set_dihedral_histograms(mol, itag, nrbins) get_dihedrals(mol2, itag) #set_weighted_dihedral_histograms(mol2, itag, work, nrbins) set_dihedral_histograms(mol2, itag, nrbins) width, height = 800, 400 image = oedepict.OEImage(width, height) moffset = oedepict.OE2DPoint(0, 0) mframe = oedepict.OEImageFrame(image, width * 0.70, height, moffset) doffset = oedepict.OE2DPoint(mframe.GetWidth(), height * 0.30) dframe = oedepict.OEImageFrame(image, width * 0.30, height * 0.5, doffset) flexibility = True colorg = get_color_gradient(nrbins, flexibility) opts = oedepict.OE2DMolDisplayOptions(mframe.GetWidth(), mframe.GetHeight(), oedepict.OEScale_AutoScale) depict_dihedrals(mframe, dframe, mol, mol2, refmol, opts, itag, nrbins, colorg) if flexibility: lopts = oedepict.OELegendLayoutOptions( oedepict.OELegendLayoutStyle_HorizontalTopLeft, oedepict.OELegendColorStyle_LightBlue, oedepict.OELegendInteractiveStyle_Hover) lopts.SetButtonWidthScale(1.2) lopts.SetButtonHeightScale(1.2) lopts.SetMargin(oedepict.OEMargin_Right, 40.0) lopts.SetMargin(oedepict.OEMargin_Bottom, 80.0) legend = oedepict.OELegendLayout(image, "Legend", lopts) legend_area = legend.GetLegendArea() draw_color_gradient(legend_area, colorg) oedepict.OEDrawLegendLayout(legend) iconscale = 0.5 oedepict.OEAddInteractiveIcon(image, oedepict.OEIconLocation_TopRight, iconscale) oedepict.OEDrawCurvedBorder(image, oedepict.OELightGreyPen, 10.0) oedepict.OEWriteImage(ofs, ext, image) return 0
def prepare_receptor(complex_pdb_filename, output_basepath, dimer=False): """ Parameters ---------- complex_pdb_filename : str The complex PDB file to read in output_basepath : str Base path for output dimer : bool, optional, default=False If True, generate the dimer as the biological unit """ import os basepath, filename = os.path.split(complex_pdb_filename) prefix, extension = os.path.splitext(filename) prefix = os.path.join(output_basepath, prefix) # Check if receptor already exists receptor_filename = f'{prefix}-receptor.oeb.gz' thiolate_receptor_filename = f'{prefix}-receptor-thiolate.oeb.gz' if os.path.exists(receptor_filename) and os.path.exists( thiolate_receptor_filename): return # Read in PDB file pdbfile_lines = [ line for line in open(complex_pdb_filename, 'r') if 'UNK' not in line ] # If monomer is specified, drop crystal symmetry lines if not dimer: pdbfile_lines = [ line for line in pdbfile_lines if 'REMARK 350' not in line ] # Reconstruct PDBFile contents pdbfile_contents = ''.join(pdbfile_lines) # Read the receptor and identify design units from openeye import oespruce, oechem from tempfile import NamedTemporaryFile with NamedTemporaryFile(delete=False, mode='wt', suffix='.pdb') as pdbfile: pdbfile.write(pdbfile_contents) pdbfile.close() complex = read_pdb_file(pdbfile.name) # TODO: Clean up #print('Identifying design units...') design_units = list(oespruce.OEMakeDesignUnits(complex)) if len(design_units) == 1: design_unit = design_units[0] elif len(design_units) > 1: #print('More than one design unit found---using first one') design_unit = design_units[0] elif len(design_units) == 0: raise Exception(f' * No design units found for {complex_pdb_filename}') # Prepare the receptor #print('Preparing receptor...') from openeye import oedocking protein = oechem.OEGraphMol() design_unit.GetProtein(protein) ligand = oechem.OEGraphMol() design_unit.GetLigand(ligand) receptor = oechem.OEGraphMol() oedocking.OEMakeReceptor(receptor, protein, ligand) oedocking.OEWriteReceptorFile(receptor, receptor_filename) with oechem.oemolostream(f'{prefix}-protein.pdb') as ofs: oechem.OEWriteMolecule(ofs, protein) with oechem.oemolostream(f'{prefix}-ligand.mol2') as ofs: oechem.OEWriteMolecule(ofs, ligand) with oechem.oemolostream(f'{prefix}-ligand.pdb') as ofs: oechem.OEWriteMolecule(ofs, ligand) with oechem.oemolostream(f'{prefix}-ligand.sdf') as ofs: oechem.OEWriteMolecule(ofs, ligand) # Filter out UNK from PDB files (which have covalent adducts) pdbfile_lines = [ line for line in open(f'{prefix}-protein.pdb', 'r') if 'UNK' not in line ] with open(f'{prefix}-protein.pdb', 'wt') as outfile: outfile.write(''.join(pdbfile_lines)) # Adjust protonation state of CYS145 to generate thiolate form #print('Deprotonating CYS145...') pred = oechem.OEAtomMatchResidue(["CYS:145: :A"]) for atom in protein.GetAtoms(pred): if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_SG: oechem.OESuppressHydrogens(atom) atom.SetFormalCharge(-1) atom.SetImplicitHCount(0) # Adjust protonation states #print('Re-optimizing hydrogen positions...') place_hydrogens_opts = oechem.OEPlaceHydrogensOptions() place_hydrogens_opts.SetBypassPredicate(pred) protonate_opts = oespruce.OEProtonateDesignUnitOptions( place_hydrogens_opts) success = oespruce.OEProtonateDesignUnit(design_unit, protonate_opts) design_unit.GetProtein(protein) # Old hacky way to adjust protonation states #opts = oechem.OEPlaceHydrogensOptions() #opts.SetBypassPredicate(pred) #describe = oechem.OEPlaceHydrogensDetails() #success = oechem.OEPlaceHydrogens(protein, describe, opts) #if success: # oechem.OEUpdateDesignUnit(design_unit, protein, oechem.OEDesignUnitComponents_Protein) # Write thiolate form of receptor receptor = oechem.OEGraphMol() oedocking.OEMakeReceptor(receptor, protein, ligand) oedocking.OEWriteReceptorFile(receptor, thiolate_receptor_filename) with oechem.oemolostream(f'{prefix}-protein-thiolate.pdb') as ofs: oechem.OEWriteMolecule(ofs, protein) # Filter out UNK from PDB files (which have covalent adducts) pdbfile_lines = [ line for line in open(f'{prefix}-protein-thiolate.pdb', 'r') if 'UNK' not in line ] with open(f'{prefix}-protein-thiolate.pdb', 'wt') as outfile: outfile.write(''.join(pdbfile_lines))
# current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. # @ <SNIPPET> from __future__ import print_function from openeye import oechem src = oechem.OEGraphMol() oechem.OESmilesToMol(src, "c1ccccc1") # make holes in the molecule index space and juggle things around oechem.OEAddExplicitHydrogens(src) oechem.OESuppressHydrogens(src) oechem.OEAddExplicitHydrogens(src) oechem.OECanonicalOrderAtoms(src) atommap = oechem.OEAtomArray(src.GetMaxAtomIdx()) dst = oechem.OEGraphMol() oechem.OECopyMol(dst, src, atommap) for srcatom in src.GetAtoms(): dstatom = atommap[srcatom.GetIdx()] print(srcatom.GetIdx(), " -> ", dstatom.GetIdx()) # @ </SNIPPET>
def suppressHydrogens(self, oeMol): tMol = oechem.OEMol(oeMol) if oeMol else None if tMol: oechem.OESuppressHydrogens(tMol) return tMol
help='if specified, will only store minimal information for each molecule (default: False)') parser.add_argument('--sort', dest='sort', action='store_true', default=False, help='if specified, will sort according to overlap (default: False)') parser.add_argument('--covalent', dest='covalent', action='store_true', default=False, help='if specified, will only consider those with `covalent_warhead=True` (default: False)') args = parser.parse_args() # Read the docked molecules as CSV print('Loading molecules...') docked_molecules = list() with oechem.oemolistream(args.docked_molecules) as ifs: docked_molecules = list() molecule = oechem.OEGraphMol() while oechem.OEReadMolecule(ifs, molecule): oechem.OESuppressHydrogens(molecule) docked_molecules.append( molecule.CreateCopy() ) print(f'{len(docked_molecules)} read') if args.covalent: print('Only filtering covalent fragments') docked_molecules = [molecule for molecule in docked_molecules if oechem.OEGetSDData(molecule, 'covalent_warhead')=='TRUE'] print(f'{len(docked_molecules)} remain after filtering') print('Loading fragments') filenames = glob(os.path.join(args.receptor_basedir, 'Mpro-x*-ligand.mol2')) fragments = dict() for filename in tqdm(filenames): with oechem.oemolistream(filename) as ifs: fragment = oechem.OEGraphMol() while oechem.OEReadMolecule(ifs, fragment):
def prepare_receptor(complex_pdb_filename, output_basepath, dimer=False, retain_water=False): """ Parameters ---------- complex_pdb_filename : str The complex PDB file to read in output_basepath : str Base path for output dimer : bool, optional, default=False If True, generate the dimer as the biological unit retain_water : bool, optional, default=False If True, will retain waters """ # Check whether this is a diamond SARS-CoV-2 Mpro structure or not import re is_diamond_structure = (re.search('-x\d+_', complex_pdb_filename) is not None) import os basepath, filename = os.path.split(complex_pdb_filename) prefix, extension = os.path.splitext(filename) prefix = os.path.join(output_basepath, prefix) # Check if receptor already exists receptor_filename = f'{prefix}-receptor.oeb.gz' thiolate_receptor_filename = f'{prefix}-receptor-thiolate.oeb.gz' if os.path.exists(receptor_filename) and os.path.exists( thiolate_receptor_filename): return # Read in PDB file, skipping UNK atoms (left over from processing covalent ligands) pdbfile_lines = [ line for line in open(complex_pdb_filename, 'r') if 'UNK' not in line ] # Check if biological symmetry header is present has_biological_symmetry_header = False for line in pdbfile_lines: if 'REMARK 350' in line: has_biological_symmetry_header = True break # Prepend REMARK 350 (biological symmetry) header lines for Mpro (from 5RGG) if not present if is_diamond_structure and (not has_biological_symmetry_header): pdbfile_lines = [ line + '\n' for line in BIOLOGICAL_SYMMETRY_HEADER.split('\n') ] + pdbfile_lines # If monomer is specified, drop crystal symmetry lines if not dimer: pdbfile_lines = [ line for line in pdbfile_lines if 'REMARK 350' not in line ] # Filter out waters if not retain_water: pdbfile_lines = [line for line in pdbfile_lines if 'HOH' not in line] # Filter out LINK records to covalent inhibitors so we can model non-covalent complex pdbfile_lines = [line for line in pdbfile_lines if 'LINK' not in line] # Reconstruct PDBFile contents pdbfile_contents = ''.join(pdbfile_lines) # Append SEQRES to all structures if they do not have it seqres = """\ SEQRES 1 A 306 SER GLY PHE ARG LYS MET ALA PHE PRO SER GLY LYS VAL SEQRES 2 A 306 GLU GLY CYS MET VAL GLN VAL THR CYS GLY THR THR THR SEQRES 3 A 306 LEU ASN GLY LEU TRP LEU ASP ASP VAL VAL TYR CYS PRO SEQRES 4 A 306 ARG HIS VAL ILE CYS THR SER GLU ASP MET LEU ASN PRO SEQRES 5 A 306 ASN TYR GLU ASP LEU LEU ILE ARG LYS SER ASN HIS ASN SEQRES 6 A 306 PHE LEU VAL GLN ALA GLY ASN VAL GLN LEU ARG VAL ILE SEQRES 7 A 306 GLY HIS SER MET GLN ASN CYS VAL LEU LYS LEU LYS VAL SEQRES 8 A 306 ASP THR ALA ASN PRO LYS THR PRO LYS TYR LYS PHE VAL SEQRES 9 A 306 ARG ILE GLN PRO GLY GLN THR PHE SER VAL LEU ALA CYS SEQRES 10 A 306 TYR ASN GLY SER PRO SER GLY VAL TYR GLN CYS ALA MET SEQRES 11 A 306 ARG PRO ASN PHE THR ILE LYS GLY SER PHE LEU ASN GLY SEQRES 12 A 306 SER CYS GLY SER VAL GLY PHE ASN ILE ASP TYR ASP CYS SEQRES 13 A 306 VAL SER PHE CYS TYR MET HIS HIS MET GLU LEU PRO THR SEQRES 14 A 306 GLY VAL HIS ALA GLY THR ASP LEU GLU GLY ASN PHE TYR SEQRES 15 A 306 GLY PRO PHE VAL ASP ARG GLN THR ALA GLN ALA ALA GLY SEQRES 16 A 306 THR ASP THR THR ILE THR VAL ASN VAL LEU ALA TRP LEU SEQRES 17 A 306 TYR ALA ALA VAL ILE ASN GLY ASP ARG TRP PHE LEU ASN SEQRES 18 A 306 ARG PHE THR THR THR LEU ASN ASP PHE ASN LEU VAL ALA SEQRES 19 A 306 MET LYS TYR ASN TYR GLU PRO LEU THR GLN ASP HIS VAL SEQRES 20 A 306 ASP ILE LEU GLY PRO LEU SER ALA GLN THR GLY ILE ALA SEQRES 21 A 306 VAL LEU ASP MET CYS ALA SER LEU LYS GLU LEU LEU GLN SEQRES 22 A 306 ASN GLY MET ASN GLY ARG THR ILE LEU GLY SER ALA LEU SEQRES 23 A 306 LEU GLU ASP GLU PHE THR PRO PHE ASP VAL VAL ARG GLN SEQRES 24 A 306 CYS SER GLY VAL THR PHE GLN """ has_seqres = 'SEQRES' in pdbfile_contents if not has_seqres: #print('Adding SEQRES') pdbfile_contents = seqres + pdbfile_contents # Read the receptor and identify design units from openeye import oespruce, oechem from tempfile import NamedTemporaryFile with NamedTemporaryFile(delete=False, mode='wt', suffix='.pdb') as pdbfile: pdbfile.write(pdbfile_contents) pdbfile.close() complex = read_pdb_file(pdbfile.name) # TODO: Clean up # Strip protons from structure to allow SpruceTK to add these back # See: 6wnp, 6wtj, 6wtk, 6xb2, 6xqs, 6xqt, 6xqu, 6m2n #print('Suppressing hydrogens') #print(f' Initial: {sum([1 for atom in complex.GetAtoms()])} atoms') for atom in complex.GetAtoms(): if atom.GetAtomicNum() > 1: oechem.OESuppressHydrogens(atom) #print(f' Final: {sum([1 for atom in complex.GetAtoms()])} atoms') # Delete and rebuild C-terminal residue because Spruce causes issues with this # See: 6m2n 6lze #print('Deleting C-terminal residue O') pred = oechem.OEIsCTerminalAtom() for atom in complex.GetAtoms(): if pred(atom): for nbor in atom.GetAtoms(): if oechem.OEGetPDBAtomIndex(nbor) == oechem.OEPDBAtomName_O: complex.DeleteAtom(nbor) #pred = oechem.OEAtomMatchResidue(["GLN:306:.*:.*:.*"]) #for atom in complex.GetAtoms(pred): # if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_O: # print('Deleting O') # complex.DeleteAtom(atom) #het = oespruce.OEHeterogenMetadata() #het.SetTitle("LIG") # real ligand 3 letter code #het.SetID("CovMoonShot1234") # in case you have corporate IDs #het.SetType(oespruce.OEHeterogenType_Ligand) # mdata.AddHeterogenMetadata(het) #print('Identifying design units...') # Produce zero design units if we fail to protonate # Log warnings errfs = oechem.oeosstream( ) # create a stream that writes internally to a stream oechem.OEThrow.SetOutputStream(errfs) oechem.OEThrow.Clear() oechem.OEThrow.SetLevel( oechem.OEErrorLevel_Verbose) # capture verbose error output opts = oespruce.OEMakeDesignUnitOptions() #print(f'ligand atoms: min {opts.GetSplitOptions().GetMinLigAtoms()}, max {opts.GetSplitOptions().GetMaxLigAtoms()}') opts.GetSplitOptions().SetMinLigAtoms( 7) # minimum fragment size (in heavy atoms) mdata = oespruce.OEStructureMetadata() opts.GetPrepOptions().SetStrictProtonationMode(True) # Both N- and C-termini should be zwitterionic # Mpro cleaves its own N- and C-termini # See https://www.pnas.org/content/113/46/12997 opts.GetPrepOptions().GetBuildOptions().SetCapNTermini(False) opts.GetPrepOptions().GetBuildOptions().SetCapCTermini(False) # Don't allow truncation of termini, since force fields don't have parameters for this opts.GetPrepOptions().GetBuildOptions().GetCapBuilderOptions( ).SetAllowTruncate(False) # Build loops and sidechains opts.GetPrepOptions().GetBuildOptions().SetBuildLoops(True) opts.GetPrepOptions().GetBuildOptions().SetBuildSidechains(True) # Don't flip Gln189 #pred = oechem.OEAtomMatchResidue(["GLN:189: :A"]) pred = oechem.OEAtomMatchResidue(["GLN:189:.*:.*:.*"]) protonate_opts = opts.GetPrepOptions().GetProtonateOptions() place_hydrogens_opts = protonate_opts.GetPlaceHydrogensOptions() #place_hydrogens_opts.SetBypassPredicate(pred) place_hydrogens_opts.SetNoFlipPredicate(pred) #protonate_opts = oespruce.OEProtonateDesignUnitOptions(place_hydrogens_opts) #opts.GetPrepOptions().SetProtonateOptions(protonate_options); # Make design units design_units = list(oespruce.OEMakeDesignUnits(complex, mdata, opts)) # Restore error stream oechem.OEThrow.SetOutputStream(oechem.oeerr) # Capture the warnings to a string warnings = errfs.str().decode("utf-8") if len(design_units) >= 1: design_unit = design_units[0] print('') print('') print(f'{complex_pdb_filename} : SUCCESS') print(warnings) elif len(design_units) == 0: print('') print('') print(f'{complex_pdb_filename} : FAILURE') print(warnings) msg = f'No design units found for {complex_pdb_filename}\n' msg += warnings msg += '\n' raise Exception(msg) # Prepare the receptor #print('Preparing receptor...') from openeye import oedocking protein = oechem.OEGraphMol() design_unit.GetProtein(protein) ligand = oechem.OEGraphMol() design_unit.GetLigand(ligand) # Create receptor and other files receptor = oechem.OEGraphMol() oedocking.OEMakeReceptor(receptor, protein, ligand) oedocking.OEWriteReceptorFile(receptor, receptor_filename) with oechem.oemolostream(f'{prefix}-protein.pdb') as ofs: oechem.OEWriteMolecule(ofs, protein) with oechem.oemolostream(f'{prefix}-ligand.mol2') as ofs: oechem.OEWriteMolecule(ofs, ligand) with oechem.oemolostream(f'{prefix}-ligand.pdb') as ofs: oechem.OEWriteMolecule(ofs, ligand) with oechem.oemolostream(f'{prefix}-ligand.sdf') as ofs: oechem.OEWriteMolecule(ofs, ligand) # Filter out UNK from PDB files (which have covalent adducts) pdbfile_lines = [ line for line in open(f'{prefix}-protein.pdb', 'r') if 'UNK' not in line ] with open(f'{prefix}-protein.pdb', 'wt') as outfile: outfile.write(''.join(pdbfile_lines)) # Adjust protonation state of CYS145 to generate thiolate form #print('Deprotonating CYS145...') # DEBUG #pred = oechem.OEAtomMatchResidue(["CYS:145: :A"]) pred = oechem.OEAtomMatchResidue(["CYS:145:.*:.*:.*"]) place_hydrogens_opts.SetBypassPredicate(pred) for atom in protein.GetAtoms(pred): if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_SG: #print('Modifying CYS 145 SG') oechem.OESuppressHydrogens(atom) atom.SetFormalCharge(-1) atom.SetImplicitHCount(0) #print('Protonating HIS41...') # DEBUG #pred = oechem.OEAtomMatchResidue(["HIS:41: :A"]) pred = oechem.OEAtomMatchResidue(["HIS:41:.*:.*:.*"]) place_hydrogens_opts.SetBypassPredicate(pred) for atom in protein.GetAtoms(pred): if oechem.OEGetPDBAtomIndex(atom) == oechem.OEPDBAtomName_ND1: #print('Protonating HIS 41 ND1') oechem.OESuppressHydrogens(atom) # strip hydrogens from residue atom.SetFormalCharge(+1) atom.SetImplicitHCount(1) # Update the design unit with the modified formal charge for CYS 145 SG oechem.OEUpdateDesignUnit(design_unit, protein, oechem.OEDesignUnitComponents_Protein) # Don't flip Gln189 #pred = oechem.OEAtomMatchResidue(["GLN:189: :A"]) #protonate_opts = opts.GetPrepOptions().GetProtonateOptions(); #place_hydrogens_opts = protonate_opts.GetPlaceHydrogensOptions() #place_hydrogens_opts.SetNoFlipPredicate(pred) # Adjust protonation states #print('Re-optimizing hydrogen positions...') # DEBUG #place_hydrogens_opts = oechem.OEPlaceHydrogensOptions() #place_hydrogens_opts.SetBypassPredicate(pred) #protonate_opts = oespruce.OEProtonateDesignUnitOptions(place_hydrogens_opts) success = oespruce.OEProtonateDesignUnit(design_unit, protonate_opts) design_unit.GetProtein(protein) # Write thiolate form of receptor receptor = oechem.OEGraphMol() oedocking.OEMakeReceptor(receptor, protein, ligand) oedocking.OEWriteReceptorFile(receptor, thiolate_receptor_filename) with oechem.oemolostream(f'{prefix}-protein-thiolate.pdb') as ofs: oechem.OEWriteMolecule(ofs, protein) # Filter out UNK from PDB files (which have covalent adducts) pdbfile_lines = [ line for line in open(f'{prefix}-protein-thiolate.pdb', 'r') if 'UNK' not in line ] with open(f'{prefix}-protein-thiolate.pdb', 'wt') as outfile: outfile.write(''.join(pdbfile_lines))
def GetBestOverlays(self, querymolstr, options, iformat, oformat): """ Return a string of the format specified by 'oformat' containing nhits overlaid confomers using querymolstr as the query interpretted as iformat. querymolstr - a string containing a molecule to use as the query options - an instance of OEShapeDatabaseOptions iformat - a string representing the file extension to parse the querymolstr as. Note: old clients could be passing .sq files, so iformat == '.oeb' will try to interpret the file as a .sq file. oformat - file format to write the results as """ timer = oechem.OEWallTimer() # make sure to wait for the load to finish blocking = True loaded = self.IsLoaded(blocking) assert loaded if iformat.startswith(".sq"): query = ReadShapeQuery(querymolstr) else: # read in query qfs = oechem.oemolistream() qfs = SetupStream(qfs, iformat) if not qfs.openstring(querymolstr): raise ValueError("Unable to open input molecule string") query = oechem.OEGraphMol() if not oechem.OEReadMolecule(qfs, query): if iformat == ".oeb": # could be an old client trying to send a .sq file. query = ReadShapeQuery(querymolstr) else: raise ValueError( "Unable to read a molecule from the string of format '%s'" % iformat) ofs = oechem.oemolostream() ofs = SetupStream(ofs, oformat) if not ofs.openstring(): raise ValueError("Unable to openstring for output") # do we only want shape based results? # this is a "Write" lock to be paranoid and not overload the GPU self.rwlock.AcquireWriteLock() try: # do search scores = self.shapedb.GetSortedScores(query, options) sys.stderr.write("%f seconds to do search\n" % timer.Elapsed()) finally: self.rwlock.ReleaseWriteLock() timer.Start() # write results for score in scores: mcmol = oechem.OEMol() if not self.moldb.GetMolecule(mcmol, score.GetMolIdx()): oechem.OEThrow.Warning( "Can't retrieve molecule %i from the OEMolDatabase, " "skipping..." % score.GetMolIdx()) continue # remove hydrogens to make output smaller, this also # ensures OEPrepareFastROCSMol will have the same output oechem.OESuppressHydrogens(mcmol) mol = oechem.OEGraphMol( mcmol.GetConf(oechem.OEHasConfIdx(score.GetConfIdx()))) oechem.OECopySDData(mol, mcmol) if options.GetSimFunc() == oefastrocs.OEShapeSimFuncType_Tanimoto: oechem.OESetSDData(mol, "ShapeTanimoto", "%.4f" % score.GetShapeTanimoto()) oechem.OESetSDData(mol, "ColorTanimoto", "%.4f" % score.GetColorTanimoto()) oechem.OESetSDData(mol, "TanimotoCombo", "%.4f" % score.GetTanimotoCombo()) else: oechem.OESetSDData(mol, "ShapeTversky", "%.4f" % score.GetShapeTversky()) oechem.OESetSDData(mol, "ColorTversky", "%.4f" % score.GetColorTversky()) oechem.OESetSDData(mol, "TverskyCombo", "%.4f" % score.GetTverskyCombo()) if options.GetInitialOrientation( ) != oefastrocs.OEFastROCSOrientation_Inertial: oechem.OEAddSDData( mol, "Opt. Starting Pos.", GetAltStartsString(options.GetInitialOrientation())) score.Transform(mol) oechem.OEWriteMolecule(ofs, mol) output = ofs.GetString() sys.stderr.write("%f seconds to write hitlist\n" % timer.Elapsed()) sys.stderr.flush() ofs.close() return output
def _expand_states(molecules, enumerate='protonation', max_states=200, suppress_hydrogen=True, reasonable=True, carbon_hybridization=True, level=0, verbose=True): """ Expand the state specified by enumerate variable Parameters ---------- molecules: OEMol or list of OEMol molecule to expand states enumerate: str, optional, default='protonation' Kind of state to enumerate. Choice of protonation, tautomers, stereoiserms suppress_hydrogen: bool, optional, default=True If True, will suppress explicit hydrogen reasonable: bool, optional, default=True If True, will rank tautomers by the most reasonable energetically carbon_hybridization: bool, optional, default=True If True, will allow carbon to change hybridization max_states: int, optional, default=200 verbose: Bool, optional, deault=TRue Returns ------- states: list of OEMol enumerated states """ if type(molecules) != type(list()): molecules = [molecules] states = list() for molecule in molecules: ostream = oechem.oemolostream() ostream.openstring() ostream.SetFormat(oechem.OEFormat_SDF) states_enumerated = 0 if suppress_hydrogen: oechem.OESuppressHydrogens(molecule) if enumerate == 'protonation': formal_charge_options = oequacpac.OEFormalChargeOptions() formal_charge_options.SetMaxCount(max_states) formal_charge_options.SetVerbose(verbose) if verbose: logger().debug("Enumerating protonation states...") for protonation_state in oequacpac.OEEnumerateFormalCharges(molecule, formal_charge_options): states_enumerated += 1 oechem.OEWriteMolecule(ostream, protonation_state) states.append(protonation_state) if enumerate == 'tautomers': #max_zone_size = molecule.GetMaxAtomIdx() tautomer_options = oequacpac.OETautomerOptions() tautomer_options.SetMaxTautomersGenerated(max_states) tautomer_options.SetLevel(level) tautomer_options.SetRankTautomers(reasonable) tautomer_options.SetCarbonHybridization(carbon_hybridization) #tautomer_options.SetMaxZoneSize(max_zone_size) tautomer_options.SetApplyWarts(True) if verbose: logger().debug("Enumerating tautomers...") for tautomer in oequacpac.OEEnumerateTautomers(molecule, tautomer_options): states_enumerated += 1 states.append(tautomer) if enumerate == 'stereoisomers': if verbose: logger().debug("Enumerating stereoisomers...") for enantiomer in oeomega.OEFlipper(molecule, max_states, True): states_enumerated += 1 enantiomer = oechem.OEMol(enantiomer) oechem.OEWriteMolecule(ostream, enantiomer) states.append(enantiomer) return states
def generate_fragalysis( series: CompoundSeriesAnalysis, fragalysis_config: FragalysisConfig, results_path: str, ) -> None: """ Generate input and upload to fragalysis from fragalysis_config Fragalysis spec:https://discuss.postera.ai/t/providing-computed-poses-for-others-to-look-at/1155/8?u=johnchodera Parameters ---------- series : CompoundSeriesAnalysis Analysis results fragalysis_config : FragalysisConfig Fragalysis input paramters results_path : str The path to the results """ import os from openeye import oechem from rich.progress import track # make a directory to store fragalysis upload data fa_path = os.path.join(results_path, "fragalysis_upload") os.makedirs(fa_path, exist_ok=True) ref_mols = fragalysis_config.ref_mols # e.g. x12073 ref_pdb = fragalysis_config.ref_pdb # e.g. x12073 # set paths ligands_path = os.path.join(results_path, fragalysis_config.ligands_filename) fa_ligands_path = os.path.join(fa_path, fragalysis_config.fragalysis_sdf_filename) # copy sprint generated sdf to new name for fragalysis input from shutil import copyfile copyfile(ligands_path, fa_ligands_path) # Read ligand poses molecules = [] with oechem.oemolistream(ligands_path) as ifs: oemol = oechem.OEGraphMol() while oechem.OEReadMolecule(ifs, oemol): molecules.append(oemol.CreateCopy()) print(f"{len(molecules)} ligands read") # Get zipped PDB if specified if fragalysis_config.ref_pdb == "references.zip": consolidate_protein_snapshots_into_pdb( oemols=molecules, results_path=results_path, pdb_filename="references.pdb", fragalysis_input=True, fragalysis_path=fa_path, ) descriptions = { "DDG (kcal/mol)": "Relative computed free energy difference", "dDDG (kcal/mol)": "Uncertainty in computed relative free energy difference", "ref_mols": "a comma separated list of the fragments that inspired the design of the new molecule (codes as they appear in fragalysis - e.g. x0104_0,x0692_0)", "ref_pdb": "The name of the fragment (and corresponding Mpro fragment structure) with the best scoring hybrid docking pose", "original SMILES": "the original SMILES of the compound before any computation was carried out", } # Preprocess molecules tags_to_retain = {"DDG (kcal/mol)", "dDDG (kcal/mol)"} index = 0 for oemol in track(molecules, "Preprocessing molecules for Fragalysis..."): # Remove hydogrens oechem.OESuppressHydrogens(oemol, True) # Get original SMILES original_smiles = oechem.OEGetSDData(oemol, "SMILES") # Remove irrelevant SD tags for sdpair in oechem.OEGetSDDataPairs(oemol): tag = sdpair.GetTag() value = sdpair.GetValue() if tag not in tags_to_retain: oechem.OEDeleteSDData(oemol, tag) # Add required SD tags oechem.OESetSDData(oemol, "ref_mols", fragalysis_config.ref_mols) # If ref_pdb is zip file, use this if fragalysis_config.ref_pdb == "references.zip": oechem.OESetSDData(oemol, "ref_pdb", f"references/references_{index}.pdb"), index += 1 else: oechem.OESetSDData(oemol, "ref_pdb", fragalysis_config.ref_pdb) oechem.OESetSDData(oemol, "original SMILES", original_smiles) # Add initial blank molecule (that includes distances) import copy from datetime import datetime # Find a molecule that includes distances, if present oemol = molecules[0].CreateCopy() # Add descriptions to each SD field for sdpair in oechem.OEGetSDDataPairs(oemol): tag = sdpair.GetTag() value = sdpair.GetValue() oechem.OESetSDData(oemol, tag, descriptions[tag]) # Add other fields oemol.SetTitle("ver_1.2") oechem.OESetSDData(oemol, "ref_url", fragalysis_config.ref_url) oechem.OESetSDData(oemol, "submitter_name", fragalysis_config.submitter_name) oechem.OESetSDData(oemol, "submitter_email", fragalysis_config.submitter_email) oechem.OESetSDData(oemol, "submitter_institution", fragalysis_config.submitter_institution) oechem.OESetSDData(oemol, "generation_date", datetime.today().strftime("%Y-%m-%d")) oechem.OESetSDData(oemol, "method", fragalysis_config.method) molecules.insert(0, oemol) # make it first molecule # Write sorted molecules with oechem.oemolostream(fa_ligands_path) as ofs: for oemol in track(molecules, description="Writing Fragalysis SDF file..."): oechem.OEWriteMolecule(ofs, oemol) # TODO add check SDF step here? # Upload to fragalysis print("Uploading to Fragalysis...") print(f"--> Target: {fragalysis_config.target_name}") from fragalysis_api.xcextracter.computed_set_update import update_cset, REQ_URL if fragalysis_config.new_upload: update_set = "None" # new upload print(f"--> Uploading a new set") else: update_set = ("".join(fragalysis_config.submitter_name.split()) + "-" + "".join(fragalysis_config.method.split())) print(f"--> Updating set: {update_set}") if fragalysis_config.ref_pdb == "references.zip": pdb_zip_path = os.path.join(fa_path, "references.zip") else: pdb_zip_path = None taskurl = update_cset( REQ_URL, target_name=fragalysis_config.target_name, sdf_path=fa_ligands_path, pdb_zip_path=pdb_zip_path, update_set=update_set, upload_key=fragalysis_config.upload_key, submit_choice=1, add=False, ) print(f"Upload complete, check upload status: {taskurl}")
'CYS145-warhead dist stddev (A)': 'is the standard deviation (in Angstroms) of the distance between CYS145 SG and any covalent warhead heavy atom during 10 ns simulation, where large values may indicate the warhead samples a variety of distances from CYS145 (covalent inhibitors only)', 'ref_mols': 'a comma separated list of the fragments that inspired the design of the new molecule (codes as they appear in fragalysis - e.g. x0104_0,x0692_0)', 'ref_pdb': 'The name of the fragment (and corresponding Mpro fragment structure) with the best scoring hybrid docking pose', 'original SMILES': 'the original SMILES of the compound before any computation was carried out', } for molecule in tqdm(docked_molecules): for sdpair in oechem.OEGetSDDataPairs(molecule): tag = sdpair.GetTag() value = sdpair.GetValue() # Remove hydrogens oechem.OESuppressHydrogens(molecule, True) # Remap SD tags if tag == 'Hybrid2': oechem.OESetSDData(molecule, 'Chemgauss4', value) oechem.OEDeleteSDData(molecule, tag) elif tag == 'fragments': value = ','.join( [f'{fragment}_0' for fragment in value.split(',')]) oechem.OESetSDData(molecule, 'ref_mols', value) oechem.OEDeleteSDData(molecule, tag) elif tag == 'docked_fragment': oechem.OESetSDData(molecule, 'ref_pdb', value + '_0') oechem.OEDeleteSDData(molecule, tag) elif tag == 'covalent_distance_min': oechem.OESetSDData(molecule, 'CYS145-warhead dist minimum (A)',
def create_dyad( state: str, docking_system: DockingSystem, design_unit: oechem.OEDesignUnit, options: oespruce.OEMakeDesignUnitOptions) -> oechem.OEDesignUnit: protonate_opts = options.GetPrepOptions().GetProtonateOptions() place_h_opts = protonate_opts.GetPlaceHydrogensOptions() protein = docking_system.protein if state == 'His41(+) Cys145(-1)': atoms = get_atoms(protein, "CYS:145:.*:.*:.*", "SG") for atom in atoms: if atom.GetExplicitHCount() == 1: oechem.OESuppressHydrogens( atom) # strip hydrogens from residue atom.SetImplicitHCount(0) atom.SetFormalCharge(-1) atoms = get_atoms(protein, "HIS:41:.*:.*:.*", "ND1") for atom in atoms: if atom.GetExplicitHCount() == 0: oechem.OESuppressHydrogens( atom) # strip hydrogens from residue atom.SetImplicitHCount(1) atom.SetFormalCharge(+1) atoms = get_atoms(protein, "HIS:41:.*:.*:.*", "NE2") for atom in atoms: if atom.GetExplicitHCount() == 0: oechem.OESuppressHydrogens( atom) # strip hydrogens from residue atom.SetImplicitHCount(1) atom.SetFormalCharge(+1) elif state == 'His41(0) Cys145(0)': atoms = get_atoms(protein, "CYS:145:.*:.*:.*", "SG") for atom in atoms: if atom.GetExplicitHCount() == 0: oechem.OESuppressHydrogens( atom) # strip hydrogens from residue atom.SetImplicitHCount(1) atom.SetFormalCharge(0) atoms = get_atoms(protein, "HIS:41:.*:.*:.*", "ND1") for atom in atoms: if atom.GetFormalCharge() == 1: oechem.OESuppressHydrogens( atom) # strip hydrogens from residue atom.SetImplicitHCount(0) atom.SetFormalCharge(0) atoms = get_atoms(protein, "HIS:41:.*:.*:.*", "NE2") for atom in atoms: if atom.GetFormalCharge() == 1: oechem.OESuppressHydrogens( atom) # strip hydrogens from residue atom.SetImplicitHCount(0) atom.SetFormalCharge(0) else: ValueError( "dyad_state must be one of ['His41(0) Cys145(0)', 'His41(+) Cys145(-)']" ) place_h_opts = bypass_atoms(["HIS:41:.*:.*:.*", "CYS:145:.*:.*:.*"], place_h_opts) oechem.OEAddExplicitHydrogens(protein) oechem.OEUpdateDesignUnit(design_unit, protein, oechem.OEDesignUnitComponents_Protein) oespruce.OEProtonateDesignUnit(design_unit, protonate_opts) return design_unit
def strip_hydrogens(complex: oechem.OEGraphMol) -> oechem.OEGraphMol: for atom in complex.GetAtoms(): if atom.GetAtomicNum() > 1: oechem.OESuppressHydrogens(atom) return complex