def main(args): if len(args) != 3: oechem.OEThrow.Usage("%s <protein> <ligand>" % args[0]) pifs = oechem.oemolistream() if not pifs.open(args[1]): oechem.OEThrow.Fatal("Unable to open %s for reading protein." % args[1]) prot = oechem.OEGraphMol() if not oechem.OEReadMolecule(pifs, prot): oechem.OEThrow.Fatal("Unable to read protein") oechem.OEAddExplicitHydrogens(prot) oechem.OEAssignBondiVdWRadii(prot) lifs = oechem.oemolistream() if not lifs.open(args[2]): oechem.OEThrow.Fatal("Unable to open %s for reading ligand." % args[2]) lig = oechem.OEGraphMol() if not oechem.OEReadMolecule(lifs, lig): oechem.OEThrow.Fatal("Unable to read ligand") oechem.OEAddExplicitHydrogens(lig) oechem.OEAssignBondiVdWRadii(lig) comp = oechem.OEGraphMol(prot) oechem.OEAddMols(comp, lig) compSurf = oespicoli.OESurface() oespicoli.OEMakeMolecularSurface(compSurf, comp) protSurf = oespicoli.OESurface() oespicoli.OEMakeMolecularSurface(protSurf, prot) ligSurf = oespicoli.OESurface() oespicoli.OEMakeMolecularSurface(ligSurf, lig) compVol = oespicoli.OESurfaceVolume(compSurf) protVol = oespicoli.OESurfaceVolume(protSurf) ligVol = oespicoli.OESurfaceVolume(ligSurf) oespicoli.OEWriteSurface("comp.oesrf", compSurf) oespicoli.OEWriteSurface("prot.oesrf", protSurf) oespicoli.OEWriteSurface("lig.oesrf", ligSurf) oechem.OEThrow.Info( "%s-%s: dV(C-P) = %.1f V(L) = %.1f V(C) = %.1f V(P) = %.1f" % (prot.GetTitle(), lig.GetTitle(), compVol - protVol, ligVol, compVol, protVol)) return 0
def main(args): if len(args) != 4: oechem.OEThrow.Usage("%s <protein> <ligand> <surface>" % args[0]) pfs = oechem.oemolistream(args[1]) prot = oechem.OEGraphMol() oechem.OEReadMolecule(pfs, prot) oechem.OEAssignBondiVdWRadii(prot) lfs = oechem.oemolistream(args[2]) lig = oechem.OEGraphMol() oechem.OEReadMolecule(lfs, lig) surf = oespicoli.OESurface() oespicoli.OEMakeMolecularSurface(surf, prot) oespicoli.OESurfaceToMoleculeDistance(surf, lig) # Mark the vertices to keep for i in range(surf.GetNumVertices()): if surf.GetDistanceElement(i) < MAX_DIST: surf.SetVertexCliqueElement(i, 1) # Crop to the binding site and output oespicoli.OESurfaceCropToClique(surf, 1) oespicoli.OEWriteSurface(args[3], surf) return 0
def main(args): if len(args) != 4: oechem.OEThrow.Usage("%s <protein> <ligand> <surface>" % args[0]) pfs = oechem.oemolistream(args[1]) prot = oechem.OEGraphMol() oechem.OEReadMolecule(pfs, prot) oechem.OEAssignBondiVdWRadii(prot) lfs = oechem.oemolistream(args[2]) lig = oechem.OEGraphMol() oechem.OEReadMolecule(lfs, lig) surf = oespicoli.OESurface() oespicoli.OEMakeMolecularSurface(surf, prot) # Iterate through all the protein surface vertices for i in range(surf.GetNumVertices()): vert = surf.GetVertex(i) # Check the distance to each atom for atom in lig.GetAtoms(): dist2 = GetDist2(lig.GetCoords(atom), vert) if dist2 < MAX_DIST * MAX_DIST: surf.SetVertexCliqueElement(i, 1) # Crop to the binding site and output oespicoli.OESurfaceCropToClique(surf, 1) oespicoli.OEWriteSurface(args[3], surf) return 0
def main(argv=[__name__]): if len(argv) != 3: oechem.OEThrow.Usage("%s <molfile.pdb> <out.srf>" % argv[0]) mol = oechem.OEGraphMol() ifs = oechem.oemolistream(argv[1]) oechem.OEReadMolecule(ifs, mol) if not oechem.OEHasResidues(mol): oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All) serials = {} for atom in mol.GetAtoms(): res = oechem.OEAtomGetResidue(atom) serials[res.GetSerialNumber()] = atom outsurf = oespicoli.OESurface() center = oechem.OEFloatArray(3) for line in open(argv[1]): if line.startswith("ANISOU"): serno, factors = ParseFactors(line) if serno in serials: mol.GetCoords(serials[serno], center) surf = GetEllipsoidalSurface(center, factors) oespicoli.OEAddSurfaces(outsurf, surf) oespicoli.OEWriteSurface(argv[2], outsurf)
def AverageSurfaceArea(mcmol): area = 0.0 parea = 0.0 for conf in mcmol.GetConfs(): surf = oespicoli.OESurface() oespicoli.OEMakeMolecularSurface(surf, conf, 0.5) MakeCliques(surf, conf) area += oespicoli.OESurfaceArea(surf) parea += oespicoli.OESurfaceCliqueArea(surf, 1) area /= mcmol.NumConfs() parea /= mcmol.NumConfs() return area, parea
def main(args): if len(args) != 3: oechem.OEThrow.Usage("%s <ref> <fit>" % args[0]) refifs = oechem.oemolistream() if not refifs.open(args[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % args[1]) refmol = oechem.OEGraphMol() oechem.OEReadMolecule(refifs, refmol) oechem.OEAssignBondiVdWRadii(refmol) fitifs = oechem.oemolistream() if not fitifs.open(args[2]): oechem.OEThrow.Fatal("Unable to open %s for reading" % args[2]) fitmol = oechem.OEGraphMol() oechem.OEReadMolecule(fitifs, fitmol) oechem.OEAssignBondiVdWRadii(fitmol) # Map the reference molecule onto a grid grd = oegrid.OEScalarGrid() oegrid.OEMakeMolecularGaussianGrid(grd, refmol, 0.5) # Get the total volume of the reference molecule refsrf = oespicoli.OESurface() oespicoli.OEMakeSurfaceFromGrid(refsrf, grd, 1.0) totalv = oespicoli.OESurfaceVolume(refsrf) # Mask out the fit molecule oegrid.OEMaskGridByMolecule(grd, fitmol) # Find how much of the reference volume is remaining fitsrf = oespicoli.OESurface() oespicoli.OEMakeSurfaceFromGrid(fitsrf, grd, 1.0) remaining = oespicoli.OESurfaceVolume(fitsrf) print("Percent overlap: %f" % ((1 - remaining / totalv) * 100)) return 0
def GetEllipsoidalSurface(center, factors): surf = oespicoli.OESurface() dir1 = oechem.OEFloatArray(3) dir1[0] = factors[0] dir1[1] = factors[3] dir1[2] = factors[4] dir2 = oechem.OEFloatArray(3) dir2[0] = factors[3] dir2[1] = factors[1] dir2[2] = factors[5] dir3 = oechem.OEFloatArray(3) dir3[0] = factors[4] dir3[1] = factors[5] dir3[2] = factors[2] oespicoli.OEMakeEllipsoidSurface(surf, center, 10.0, 10.0, 10.0, dir1, dir2, dir3, 4) return surf
def main(args): if len(args) != 3: oechem.OEThrow.Usage("%s <protein> <surface>" % args[0]) ifs = oechem.oemolistream() if not ifs.open(args[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % args[1]) mol = oechem.OEGraphMol() oechem.OEReadMolecule(ifs, mol) oechem.OEPerceiveResidues(mol) oechem.OEAssignBondiVdWRadii(mol) # Generate the molecular surface surf = oespicoli.OESurface() oespicoli.OEMakeMolecularSurface(surf, mol, 0.5) # Mark all the vertices associated with hydrophobic atoms for i in range(surf.GetNumVertices()): atom = mol.GetAtom(oechem.OEHasAtomIdx(surf.GetAtomsElement(i))) if (AtomInHydrophobicResidue(atom)): surf.SetVertexCliqueElement(i, 1) # Crop to only those triangles oespicoli.OESurfaceCropToClique(surf, 1) # nlqs is the number of different connected components nclqs = oespicoli.OEMakeConnectedSurfaceCliques(surf) # Find the largest component maxclq = 0 maxarea = 0.0 for i in range(nclqs): area = oespicoli.OESurfaceCliqueArea(surf, i+1) print("clique: %d area: %f" % (i+1, area)) if (area > maxarea): maxclq = i+1 maxarea = area # Crop to it oespicoli.OESurfaceCropToClique(surf, maxclq) oespicoli.OEWriteSurface(args[2], surf) return 0
def main(args): if len(args) != 3: oechem.OEThrow.Usage("%s <protein> <surface>" % args[0]) prtfs = oechem.oemolistream() if not prtfs.open(args[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % args[1]) prt = oechem.OEGraphMol() oechem.OEReadMolecule(prtfs, prt) oechem.OEAssignBondiVdWRadii(prt) surf = oespicoli.OESurface() oespicoli.OEMakeCavitySurfaces(prt, surf) oespicoli.OEInvertSurface(surf) oespicoli.OEWriteSurface(args[2], surf) return 0
def main(args): if len(args) != 3: oechem.OEThrow.Usage("%s <molecules> <oebfile>" % args[0]) ifs = oechem.oemolistream() if not ifs.open(args[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % args[1]) ofs = oechem.oemolostream() if not ofs.open(args[2]): oechem.OEThrow.Fatal("Unable to open %s for writing" % args[2]) for mol in ifs.GetOEGraphMols(): oechem.OEAssignBondiVdWRadii(mol) surf = oespicoli.OESurface() oespicoli.OEMakeMolecularSurface(surf, mol, 0.5) ColorSurface(surf, mol) mol.SetData("psasurf", surf) oechem.OEWriteMolecule(ofs, mol) return 0
def main(args): if len(args) != 4: oechem.OEThrow.Usage("%s <protein> <ligand> <surface>" % args[0]) prtfs = oechem.oemolistream(args[1]) prt = oechem.OEGraphMol() oechem.OEReadMolecule(prtfs, prt) oechem.OESuppressHydrogens(prt) oechem.OEAssignBondiVdWRadii(prt) ligfs = oechem.oemolistream(args[2]) lig = oechem.OEGraphMol() oechem.OEReadMolecule(ligfs, lig) oechem.OESuppressHydrogens(lig) oechem.OEAssignBondiVdWRadii(lig) grid = oegrid.OEScalarGrid() oespicoli.OEMakeVoidVolume(prt, lig, grid, 0.5) surf = oespicoli.OESurface() oespicoli.OEMakeSurfaceFromGrid(surf, grid, 0.5) oespicoli.OEWriteSurface(args[3], surf) return 0
from openeye import oespicoli if len(sys.argv) != 2: oechem.OEThrow.Usage("%s <input>" % sys.argv[0]) ims = oechem.oemolistream() if not ims.open(sys.argv[1]): oechem.OEThrow.Fatal("Unable to open %s" % sys.argv[1]) mol = oechem.OEGraphMol() if not oechem.OEReadMolecule(ims, mol): oechem.OEThrow.Fatal("Unable to read a molecule") oechem.OEAssignBondiVdWRadii(mol) # @ <SNIPPET-SetData> surf = oespicoli.OESurface() oespicoli.OEMakeMolecularSurface(surf, mol) mol.SetData("surface", surf) ofs = oechem.oemolostream("foo.oeb") oechem.OEWriteMolecule(ofs, mol) # @ </SNIPPET-SetData> ofs.close() # @ <SNIPPET-GetData> ifs = oechem.oemolistream("foo.oeb") oechem.OEReadMolecule(ifs, mol) msrf = mol.GetData("surface") # @ </SNIPPET-GetData>
def nmax_waters(protein, ligand, cutoff): # Grid Spacing in A spacing = 0.5 complex = oechem.OEMol(protein) oechem.OEAddMols(complex, ligand) surf = oespicoli.OESurface() oespicoli.OEMakeMolecularSurface(surf, complex, spacing) # oespicoli.OEWriteSurface("test_surf.oesrf", surf) center = oechem.OEFloatArray(3) extents = oechem.OEFloatArray(3) oechem.OEGetCenterAndExtents(ligand, center, extents) extents = oechem.OEFloatArray( [max(extents) * 2, max(extents) * 2, max(extents) * 2]) grid_reference = oegrid.OEScalarGrid() oegrid.OEMakeGridFromCenterAndExtents(grid_reference, center, extents, spacing) grid_spicoli = oegrid.OEScalarGrid() oespicoli.OEMakeBitGridFromSurface(grid_spicoli, surf) for iz in range(grid_reference.GetZDim()): for iy in range(grid_reference.GetYDim()): for ix in range(grid_reference.GetXDim()): x = grid_reference.GetX(ix) y = grid_reference.GetY(iy) z = grid_reference.GetZ(iz) ix_spicoli = grid_spicoli.GetXIdx(x) iy_spicoli = grid_spicoli.GetYIdx(y) iz_spicoli = grid_spicoli.GetZIdx(z) value = grid_spicoli.GetValue(ix_spicoli, iy_spicoli, iz_spicoli) grid_reference.SetValue(ix, iy, iz, value) # Invert Grid for iz in range(grid_reference.GetZDim()): for iy in range(grid_reference.GetYDim()): for ix in range(grid_reference.GetXDim()): # print("ix = {} iy = {} iz = {} value = {}".format(ix, iy, iz , grid.GetValue(ix, iy, iz))) if grid_reference.GetValue(ix, iy, iz) == 0.0: grid_reference.SetValue(ix, iy, iz, 1.0) else: grid_reference.SetValue(ix, iy, iz, 0.0) ligand_coords = ligand.GetCoords() for iz in range(grid_reference.GetZDim()): for iy in range(grid_reference.GetYDim()): for ix in range(grid_reference.GetXDim()): if grid_reference.GetValue(ix, iy, iz) == 1.0: x = grid_reference.GetX(ix) y = grid_reference.GetY(iy) z = grid_reference.GetZ(iz) min_sq = cutoff * cutoff for coord in ligand_coords.values(): distsq = dist2(coord, (x, y, z)) if distsq < min_sq: min_sq = distsq break if min_sq == cutoff * cutoff: grid_reference.SetValue(ix, iy, iz, 0.0) protein_coords = protein.GetCoords() for iz in range(grid_reference.GetZDim()): for iy in range(grid_reference.GetYDim()): for ix in range(grid_reference.GetXDim()): if grid_reference.GetValue(ix, iy, iz) == 1.0: x = grid_reference.GetX(ix) y = grid_reference.GetY(iy) z = grid_reference.GetZ(iz) min_sq = cutoff * cutoff for coord in protein_coords.values(): distsq = dist2(coord, (x, y, z)) if distsq < min_sq: min_sq = distsq break if min_sq == cutoff * cutoff: grid_reference.SetValue(ix, iy, iz, 0.0) # oegrid.OEWriteGrid("protein_ligand.grd", grid_reference) count = 0 for iz in range(grid_reference.GetZDim()): for iy in range(grid_reference.GetYDim()): for ix in range(grid_reference.GetXDim()): if grid_reference.GetValue(ix, iy, iz) == 1.0: count += 1 # Calculate Volume from count in Angstrom vcount = (spacing**3) * count # Number of water molecule in vcount volume nwaters = int(0.034 * vcount) return nwaters # def select_nmax_waters()
def do(controller): """ """ # get the controller command cmd = controller.command # get the command line arguments and options args = controller.pargs # predicate to remove non-polymer atoms from structure nonpolymers = oechem.OEOrAtom( OEAtomHasIntData(('entity_type_bm', 0)), OEAtomBinaryAndIntData(('entity_type_bm', 3))) assemblysets = get_assembly_sets(args) # directory containing all the biological assemblies in OEB format OEB_ASSEMBLIES_DIR = app.config.get('directories', 'quat_oeb') # directory where surface areas will be written CREDO_DATA_DIR = app.config.get('directories', 'credo_data') ifs = oechem.oemolistream() ifs.SetFormat(oechem.OEFormat_OEB) # initialize progressbar if args.progressbar: bar = ProgressBar(widgets=[ 'PDB entries: ', SimpleProgress(), ' ', Percentage(), Bar() ], maxval=len(assemblysets)).start() # iterate through assembly sets for counter, (pdb, assemblyset) in enumerate(assemblysets, 1): if args.progressbar: bar.update(counter) # create a data directory for this structure to which all data will be written struct_data_dir = os.path.join(CREDO_DATA_DIR, pdb[1:3].lower(), pdb.lower()) # make necessary directories recursively if they do not exist yet if not exists(struct_data_dir): os.makedirs(struct_data_dir) # path to the file where the atom surface areas of all atoms will be written surface_areas_path = os.path.join( struct_data_dir, 'binding_site_atom_surface_areas.credo') # do not recalculate atom surface area contributions if incremental if args.incremental and exists( surface_areas_path) and getsize(surface_areas_path) > 0: continue elif (args.update and exists(surface_areas_path) and getmtime(surface_areas_path) >= time() - (args.update * 60 * 60 * 24) and getsize(surface_areas_path)): app.log.info("Output for PDB entry {0} exists and is more recent than {1} days. Skipped."\ .format(pdb, args.update)) continue # output file stream and CSV writer atomfs = open(surface_areas_path, 'w') atomwriter = csv.writer(atomfs, dialect='tabs') # deal with each found assembly separately # some pdb entries consist of more than one for assembly in assemblyset: if args.quat: path = os.path.join(OEB_ASSEMBLIES_DIR, pdb[1:3].lower(), pdb.lower(), assembly) else: app.log.error("the calculation of buried ligand surface areas " "is only supported for quaternary structures.") sys.exit(1) if not os.path.isfile(path): app.log.warn("cannot calculate buried surface areas: " "file {} does not exist!".format(path)) # get the quaternary structure ifs.open(str(path)) try: assembly = ifs.GetOEGraphMols().next() except StopIteration: assembly = None if not assembly: app.log.warn( "cannot calculate buried surface areas: " "file {} does not contain a valid molecule!".format(path)) continue if not assembly.GetListData('ligands'): continue # identifier of the assembly assembly_serial = assembly.GetIntData('assembly_serial') # remove all non-polymers from assembly for atom in assembly.GetAtoms(nonpolymers): assembly.DeleteAtom(atom) # ignore bizarre assemblies if not assembly.NumAtoms(): app.log.warn( "cannot calculate buried surface areas: " "file {} contains assembly with no atoms!".format(path)) continue # keep only the location state with the largest average occupancy assembly_hi_occ = oechem.OEGraphMol() altlocfactory = oechem.OEAltLocationFactory(assembly) altlocfactory.MakeCurrentAltMol(assembly_hi_occ) # get the ligands ligands = assembly_hi_occ.GetListData('ligands') # iterate through all ligands of the biomolecule and calculate the buried # surface area atom contributions for all involved atoms for ligand in ligands: # ignore small ligands if oechem.OECount(ligand, oechem.OEIsHeavy()) < 7: continue entity_serial = ligand.GetIntData('entity_serial') # keep only the location state with the largest average occupancy altlig = oechem.OEGraphMol() altlocfactory = oechem.OEAltLocationFactory(ligand) altlocfactory.MakeCurrentAltMol(altlig) cmplx_srf = oespicoli.OESurface() ligand_srf = oespicoli.OESurface() # make solvent-accessible surface of ligand oespicoli.OEMakeAccessibleSurface(ligand_srf, altlig, 0.5, 1.4) # get the atom contributions of the assembly surface ligand_atom_areas = get_atom_surface_areas(altlig, ligand_srf) # extract the binding site of the assembly to speed up surface # area calculation binding_site = get_binding_site(assembly_hi_occ, altlig) # make solvent-accessible surface of binding site binding_site_srf = oespicoli.OESurface() oespicoli.OEMakeAccessibleSurface(binding_site_srf, binding_site, 0.5, 1.4) # get the atom contributions of the assembly surface binding_site_atom_areas = get_atom_surface_areas( binding_site, binding_site_srf) # create complex cmplx = oechem.OEGraphMol() oechem.OEAddMols(cmplx, binding_site) oechem.OEAddMols(cmplx, altlig) # make solvent-accessible surface of the complex oespicoli.OEMakeAccessibleSurface(cmplx_srf, cmplx, 0.5, 1.4) # surface area atom contributions of the whole complex cmplx_atom_areas = get_atom_surface_areas(cmplx, cmplx_srf) ## extract the atom surface areas in the bound state through slices binding_site_atom_areas_bound = cmplx_atom_areas[:binding_site. NumAtoms()] ligand_atom_areas_bound = cmplx_atom_areas[binding_site. NumAtoms():] # difference between apo and bound state per polymer atom binding_site_delta = binding_site_atom_areas - binding_site_atom_areas_bound ligand_delta = ligand_atom_areas - ligand_atom_areas_bound # boolean map indicating for which atom the surface area has changed binding_site_atom_map = binding_site_delta != 0 ligand_atom_map = ligand_delta != 0 if args.dry_run: continue # only record the atoms where the solvent-accessible surface # area has actually changed write_atoms(atomwriter, binding_site, binding_site_atom_map, pdb, assembly_serial, entity_serial, binding_site_atom_areas, binding_site_atom_areas_bound) # only record the atoms where the solvent-accessible surface area # has actually changed write_atoms(atomwriter, altlig, ligand_atom_map, pdb, assembly_serial, entity_serial, ligand_atom_areas, ligand_atom_areas_bound) app.log.debug("wrote buried surface areas for all ligands in " "biomolecule {} to {}.".format( pdb, surface_areas_path)) atomfs.flush() atomfs.close() if args.progressbar: bar.finish()