def keep_molecule(mol, remove_smirks = list()): """ Determines if the molecule will be stored. Parameters ---------- mol - OEMol remove_smirks - list of SMIRKS strings you don't want in your molecules Returns ------- boolean - True (molecule meets the requirements below) - has no metal atoms - no more than 200 heavy atoms - has none of the SMIRKS in remove_smirks list - molecule has appropriate valency """ # Check number of metal atoms if oechem.OECount(mol, oechem.OEIsMetal()) > 0: return False # Check number of heavy atoms if oechem.OECount(mol, oechem.OEIsHeavy()) > 200: return False # Check for patterns in remove smirks list for smirks in remove_smirks: qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smirks): continue ss = oechem.OESubSearch(qmol) matches = [match for match in ss.Match(mol, False)] if len(matches) > 0: return False # check valency return check_valence(mol)
def GetFragmentScore(mol): score = 0.0 score += 2.0 * oechem.OECount(mol, oechem.OEAtomIsInRing()) score += 1.0 * oechem.OECount(mol, oechem.OENotAtom(oechem.OEAtomIsInRing())) return score
def keep_molecule(mol, max_heavy_atoms = 100, remove_smirks = list(), max_metals = 0, elements = [], check_type = None): if oechem.OECount(mol, oechem.OEIsMetal()) > max_metals: return False if oechem.OECount(mol, oechem.OEIsHeavy()) > max_heavy_atoms: return False # Remove very small molecules that are not interesting if oechem.OECount(mol, oechem.OEIsHeavy()) < 5: return False for smirks in remove_smirks: qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smirks): continue ss = oechem.OESubSearch(qmol) matches = [match for match in ss.Match(mol, False)] if len(matches) > 0: return False if elements != None: elements_list = read_Elements(elements) if not check_element(mol, elements_list): return False if check_type != None: types = check_type.split(",") if not check_atomtype(mol, types): return False return check_valence(mol)
def DumpGroups(mol): print("groups of", mol.GetTitle()) print("number of atom groups", oechem.OECount(mol, IsAtomGroup())) print("number of bond groups", oechem.OECount(mol, IsBondGroup())) print("number of aromatic atoms groups", oechem.OECount(mol, oechem.OEHasGroupType(oechem.OEGetTag("aromatic atoms")))) print("number of aromatic bonds groups", oechem.OECount(mol, oechem.OEHasGroupType(oechem.OEGetTag("aromatic bonds")))) # loop over groups for g in mol.GetGroups(): DumpGroup(g) print()
def is_undesirable_molecule(mol): if has_undesirable_elements(mol): return True if oechem.OECount(mol, oechem.IsRotor()) == 0: return True return False
def GetFuncGroups(mol): ''' :param mol: :return: ''' funcGrps = [] for funcGrp in oemedchem.OEGetFuncGroupFragments(mol): if oechem.OECount(funcGrp, oechem.OEIsHeavy()) > 5: continue if oechem.OECount(funcGrp, oechem.OEIsHetero()) == 0: continue if oechem.OECount(funcGrp, oechem.OEAtomIsInRing()) > 0: continue funcGrps.append(oechem.OEAtomBondSet(funcGrp)) return funcGrps
def main(argv=[__name__]): if len(argv) != 2: oechem.OEThrow.Usage("%s <infile>" % argv[0]) ifs = oechem.oemolistream() if not ifs.open(argv[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1]) print("Title MolWt NumAtoms NumHeavyAtoms NumRingAtoms NumRotors NumConfs") for mol in ifs.GetOEMols(): title = mol.GetTitle() if not title: title = "Untitled" print("%s %.3f %d %d %d %d %d" % (title, oechem.OECalculateMolecularWeight(mol), mol.NumAtoms(), oechem.OECount(mol, oechem.OEIsHeavy()), oechem.OECount(mol, oechem.OEAtomIsInRing()), oechem.OECount(mol, oechem.OEIsRotor()), mol.NumConfs()))
def main(argv=[__name__]): if len(argv) != 4: oechem.OEThrow.Usage("%s <database> <prefix> <n_servers>" % argv[0]) # input - preserve rotor-offset-compression ifs = oechem.oemolistream() oechem.OEPreserveRotCompress(ifs) ifname = argv[1] if not ifs.open(ifname): oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1]) # output prefix = argv[2] ext = oechem.OEGetFileExtension(prefix) extstrt = len(prefix) if ext: extstrt = -(len(ext) + 1) else: ext = oechem.OEGetFileExtension(ifname) base = prefix[:extstrt] fmt = base + "_%i." + ext nservers = int(argv[3]) outstrms = [] for i in range(1, nservers + 1): ofs = oechem.oemolostream() if not ofs.open(fmt % i): oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[2]) outstrms.append(ofs) dots = oechem.OEDots(10000, 200, "molecules") for mol in ifs.GetOEMols(): oefastrocs.OEPrepareFastROCSMol(mol) nhvyatoms = oechem.OECount(mol, oechem.OEIsHeavy()) ofs = outstrms[nhvyatoms % nservers] oechem.OEWriteMolecule(ofs, mol) dots.Update() dots.Total() for strm in outstrms: fname = strm.GetFileName() strm.close() oechem.OEThrow.Info("Indexing %s" % fname) if not oechem.OECreateMolDatabaseIdx(fname): oechem.OEThrow.Fatal("Failed to index %s" % fname) return 0
def heavy_atom_count(self): """ Counts the number of heavy atoms in an oemol Parameters ---------- Returns ------- int, number of heavy atoms in molecule """ return oechem.OECount(self.mol, oechem.OEIsHeavy())
def CountRotors(ifs): rotcounts = [] for mol in ifs.GetOEMols(): nrots = oechem.OECount(mol, oechem.OEIsRotor()) while nrots >= len(rotcounts): rotcounts.append(0) rotcounts[nrots] += 1 print("Max rotors:", len(rotcounts) - 1) print("Rotorcount distribution:") for rots, numrot in enumerate(rotcounts): print("\t%d:\t%d" % (rots, numrot))
def _check_one_molecule(mol2file: str) -> (oechem.OEMol, bool): """Checks if the molecule in the given file has only one trivalent nitrogen. Returns: The molecule itself, as well as a bool telling if there is only one trivalent nitrogen. """ istream = oechem.oemolistream(mol2file) istream.SetFormat(oechem.OEFormat_MOL2) mol = oechem.OEMol() oechem.OEReadMolecule(istream, mol) return mol, oechem.OECount(mol, oechem.OEIsInvertibleNitrogen()) == 1
def check_frag_complexity(frag_smi, filter_type=2, check_n_rings=True, filter_ortho=True): from openeye import oechem oemol = oechem.OEGraphMol() oechem.OESmilesToMol(oemol, frag_smi) nrots = oechem.OECount(oemol, oechem.OEIsRotor()) # print(f'{frag_smi}: {nrots}') num_components, component_membership = oechem.OEDetermineComponents(oemol) num_rings = oemol.NumBonds() - oemol.NumAtoms() + num_components if filter_type == 1: if nrots > 0: return True, f' nrot: {nrots}' else: if check_n_rings and num_rings > 1: return True, f' nrings: {num_rings}' else: if filter_ortho and find_ortho_substituents(frag_smi): return True, f'ortho substituent exists.' else: return False, 'pass' elif filter_type == 2: if nrots > 1: return True, f' nrot: {nrots}' else: # nrot = 0 or 1 if check_n_rings and num_rings > 1: return True, f' nrings: {num_rings}' # remain 1ring with nrot 0 or 1/ chain with nrot 0 or 1 elif check_n_rings and num_rings == 1 and nrots == 1: return True, f' nrings: {num_rings}, nrots: {nrots}' else: if filter_ortho and find_ortho_substituents(frag_smi): return True, f'ortho substituent exists.' else: return False, 'pass'
from __future__ import print_function from openeye import oechem class PredHasDoubleBondO(oechem.OEUnaryAtomPred): def __call__(self, atom): for bond in atom.GetBonds(): if bond.GetOrder() == 2 and bond.GetNbr(atom).IsOxygen(): return True return False class PredAmideBond(oechem.OEUnaryBondPred): def __call__(self, bond): if bond.GetOrder() != 1: return False atomB = bond.GetBgn() atomE = bond.GetEnd() pred = PredHasDoubleBondO() if atomB.IsCarbon() and atomE.IsNitrogen() and pred(atomB): return True if atomB.IsNitrogen() and atomE.IsCarbon() and pred(atomE): return True return False mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "CC(=O)Nc1c[nH]cc1") print("Number of amide bonds =", oechem.OECount(mol, PredAmideBond())) # @ </SNIPPET>
def generate_torsion_profile(mol_list): sf_map = {} for graph_mol in mol_list: if oechem.OECount(graph_mol, oechem.OEIsRotor()) == 0: logging.warning( 'WARNING: Skipping molecule %s... rotor count is zero', graph_mol.GetTitle()) continue frag_mols = get_molecule_torsion_fragments(graph_mol) if len(frag_mols) == 0: logging.warning( 'WARNING: Skipping molecule %s... cannot identify torsional fragments', graph_mol.GetTitle()) continue _, torsion_data = extract_molecule_torsion_data(graph_mol, frag_mols) for frag_mol in frag_mols: if has_undesirable_elements(frag_mol) or oechem.OECount( frag_mol, oechem.OEIsPhosphorus()) > 0: logging.warning( 'WARNING: Skipping a fragment in molecule %s... fragment has undesirable elements', graph_mol.GetTitle()) continue # skip fragments with one or more formal charge skip_torsion = False if oechem.OECount(frag_mol, oechem.OEHasFormalCharge(1)) > 0 \ or oechem.OECount(frag_mol, oechem.OEHasFormalCharge(2)) > 0: skip_torsion = True specific_inchi = get_specific_dihedral_inchi_key(frag_mol) if specific_inchi not in sf_map: sf_list = get_profile_sf(frag_mol) sf_map[specific_inchi] = sf_list torsion_data_items = torsion_data[specific_inchi] for torsion_data_item in torsion_data_items: a_idx, b_idx, c_idx, d_idx, _ = torsion_data_item b = graph_mol.GetAtom(oechem.OEHasAtomIdx(b_idx)) c = graph_mol.GetAtom(oechem.OEHasAtomIdx(c_idx)) bond = graph_mol.GetBond(b, c) if skip_torsion: bond.SetData(SKIP_TORSION_TAG, True) tor_atoms_str = ' '.join( list(map(str, [a_idx, b_idx, c_idx, d_idx]))) if not bond.HasData(TORSION_ATOMS_FRAGMENT_TAG): bond.SetData(TORSION_ATOMS_FRAGMENT_TAG, tor_atoms_str) bond.SetData(SPECIFIC_INCHI_TAG, specific_inchi) else: tmp_data = bond.GetData(TORSION_ATOMS_FRAGMENT_TAG) tmp_data = tmp_data + ':' + tor_atoms_str bond.SetData(TORSION_ATOMS_FRAGMENT_TAG, tmp_data) graph_mol.SetData(HAS_PROFILES_TAG, False) for bond in graph_mol.GetBonds(oechem.OEIsRotor()): if bond.HasData(TORSION_ATOMS_FRAGMENT_TAG): graph_mol.SetData(HAS_PROFILES_TAG, True) break return mol_list, sf_map
# # TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is # provided to current licensees or subscribers of OpenEye products or # SaaS offerings (each a "Customer"). # Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. # @ <SNIPPET> from __future__ import print_function from openeye import oechem mol = oechem.OEGraphMol() if not oechem.OEParseSmiles(mol, "C1=CC=CC=C1"): print("SMILES string was invalid!") print("Number of aromatic atoms =", oechem.OECount(mol, oechem.OEIsAromaticAtom())) oechem.OEAssignAromaticFlags(mol) print("Number of aromatic atoms =", oechem.OECount(mol, oechem.OEIsAromaticAtom())) # @ </SNIPPET>
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is # provided to current licensees or subscribers of OpenEye products or # SaaS offerings (each a "Customer"). # Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. # @ <SNIPPET> from __future__ import print_function from openeye import oechem class PredAliphaticNitrogen(oechem.OEUnaryAtomPred): def __call__(self, atom): return atom.IsNitrogen() and not atom.IsAromatic() mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1cc[nH]c1CC2COCNC2") print("Number of aliphatic N atoms =", end=" ") print(oechem.OECount(mol, PredAliphaticNitrogen())) # @ </SNIPPET>
#!/usr/bin/env python # (C) 2017 OpenEye Scientific Software Inc. All rights reserved. # # TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is # provided to current licensees or subscribers of OpenEye products or # SaaS offerings (each a "Customer"). # Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. from openeye import oechem # @ <SNIPPET> mol = oechem.OEGraphMol() oechem.OEFastaToMol(mol, "AVILMPTWSTNQCGPRHKDE") print(oechem.OECount(mol, oechem.OEIsCAlpha())) # @ </SNIPPET>
# THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. # @ <SNIPPET> from __future__ import print_function from openeye import oechem mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1cnc(O)cc1CCCBr") print("Number of chain atoms =", end=" ") print(oechem.OECount(mol, oechem.OENotAtom(oechem.OEAtomIsInRing()))) print("Number of aromatic nitrogens =", end=" ") print( oechem.OECount( mol, oechem.OEAndAtom(oechem.OEIsNitrogen(), oechem.OEIsAromaticAtom()))) print("Number of non-carbons =", end=" ") print( oechem.OECount(mol, oechem.OENotAtom(oechem.OEHasAtomicNum(oechem.OEElemNo_C)))) print("Number of nitrogen and oxygen atoms =", end=" ") print( oechem.OECount(
# create subdirectory for this set if not os.path.exists(fileprefix): os.makedirs(fileprefix) os.chdir(fileprefix) # copy temporary files copyfile('../../frcmod.Frosst_AlkEthOH', './frcmod.Frosst_AlkEthOH') copyfile('../../leaprc.Frosst_AlkEthOH', './leaprc.Frosst_AlkEthOH') copyfile('../../' + fileprefix + '.oeb', './' + fileprefix + '.oeb') ifs = oechem.oemolistream(fileprefix + '.oeb') mol = oechem.OEMol() for mol in ifs.GetOEMols(): # add atom names c0 (methane) and c1302 (water) if (oechem.OECount(mol, oechem.OEIsHeavy()) == 1): oechem.OETriposAtomNames(mol) # generate input files if hasAmberParams(mol, cmd_string): print('%s successful writing amber .mol2, .top, and .crd file' % mol.GetTitle()) # treat water with diff pre-existing tleap input file elif mol.GetTitle().split("_")[1] == 'c1302': copyfile('../../files_for_c1302/frcmod.tip3p', './frcmod.tip3p') copyfile('../../files_for_c1302/AlkEthOH_c1302_edited.leap_in', './AlkEthOH_c1302_edited.leap_in') os.system( 'tleap -f leaprc.Frosst_AlkEthOH -f AlkEthOH_c1302_edited.leap_in >| leap_lig.stdout' ) print('%s successful writing amber .mol2, .top, and .crd file' %
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData, argv) mol = itf.GetOEGraphMol("-mol") print("Number of heavy atoms in molecule = %d" % oechem.OECount(mol, oechem.OEIsHeavy()))
#!/usr/bin/env python # (C) 2017 OpenEye Scientific Software Inc. All rights reserved. # # TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is # provided to current licensees or subscribers of OpenEye products or # SaaS offerings (each a "Customer"). # Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. # @ <SNIPPET> from __future__ import print_function from openeye import oechem mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1cc[nH]c1CC2COCNC2") print("Number of heavy atoms =", oechem.OECount(mol, oechem.OEIsHeavy())) print("Number of ring atoms =", oechem.OECount(mol, oechem.OEAtomIsInRing())) # @ </SNIPPET>
def all_info_df(ffdirectorylist, all_ff_df): """ This is the all_info_df function. It takes in the list of forcefields, as well as the dataframe of all molecule names, and runs TFD and Tanimoto Combo on all molecules. Its output is a dataframe of all this data. Args: ffdirectorylist (list) list of ff to compare all_ff_df (dataframe) dataframe created by make_molname_df func above. Returns: all_ff_df (dataframe) same dataframe with appended columns. """ # Creating empty dictionaries that TFD and TANI scores will go in later, # As well as a heavyatomlist for putting heavy atoms in heavyatomdict = {} TFDdict = {} TANIdict = {} # Creates combinations of forcefields and puts them into dictionaries for i, j in list(itertools.combinations(ffdirectorylist, 2)): TFDdict['%s %s' % (i, j)] = {} TANIdict['%s %s' % (i, j)] = {} # Generates all the data for molname in all_ff_df['MolNames']: print(molname) mol_file = '%s' % molname + '.mol2' try: refmolin = oechem.oemolistream( '%s/%s/%s' % (directory, ffdirectorylist[0], mol_file)) refmolhev = oechem.OEGraphMol() oechem.OEReadMolecule(refmolin, refmolhev) heavyvalue = oechem.OECount(refmolhev, oechem.OEIsHeavy()) heavyatomdict[molname] = heavyvalue refmolin.close() # Gets TanimotoCombo and TFD values for i, j in list(itertools.combinations(ffdirectorylist, 2)): refmolin = oechem.oemolistream('%s/%s/%s' % (directory, i, mol_file)) refmol = oechem.OEGraphMol() oechem.OEReadMolecule(refmolin, refmol) qmolin = oechem.oemolistream('%s/%s/%s' % (directory, j, mol_file)) qmol = oechem.OEGraphMol() oechem.OEReadMolecule(qmolin, qmol) # Getting TFD TFDvalue = TFD_for_oemols(refmol, qmol) TFDdict['%s %s' % (i, j)]['%s' % molname] = TFDvalue # Getting TanimotoCombo TANIvalue = tanimotocombo(refmol, qmol) TANIdict['%s %s' % (i, j)][molname] = TANIvalue qmolin.close() refmolin.close() except: heavyatomdict[molname] = -1 for i, j in list(itertools.combinations(ffdirectorylist, 2)): TANIdict['%s %s' % (i, j)][molname] = -1 TFDdict['%s %s' % (i, j)][molname] = -1 qmolin.close() refmolin.close() # Loads data into dataframe for key in TFDdict: tempdf = pd.DataFrame.from_dict(TFDdict['%s' % key], 'index') tempdf = tempdf.rename({0: 'TFD %s' % key}, axis='columns') tempdf['MolNames'] = tempdf.index all_ff_df = all_ff_df.merge(tempdf, on='MolNames') for key in TANIdict: tempdf = pd.DataFrame.from_dict(TANIdict['%s' % key], 'index') tempdf = tempdf.rename({0: 'TANI %s' % key}, axis='columns') tempdf['MolNames'] = tempdf.index all_ff_df = all_ff_df.merge(tempdf, on='MolNames') tempdf = pd.DataFrame.from_dict(heavyatomdict, orient="index") tempdf = tempdf.rename({0: 'HeavyAtomCount'}, axis='columns') tempdf['MolNames'] = tempdf.index all_ff_df = all_ff_df.merge(tempdf, on='MolNames') return all_ff_df
#!/usr/bin/env python # (C) 2017 OpenEye Scientific Software Inc. All rights reserved. # # TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is # provided to current licensees or subscribers of OpenEye products or # SaaS offerings (each a "Customer"). # Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. # @ <SNIPPET> from __future__ import print_function from openeye import oechem mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1cc[nH]c1CC2COCNC2") # @ <SNIPPET-PRED-ATOMIC> print("Number of oxygen atoms =", oechem.OECount(mol, oechem.OEHasAtomicNum(oechem.OEElemNo_O))) print("Number of oxygen atoms =", oechem.OECount(mol, oechem.OEIsOxygen())) # @ </SNIPPET-PRED-ATOMIC> # @ </SNIPPET>
def IsMoleculeInHeavyAtomCountRange(min, max, mol): count = oechem.OECount(mol, oechem.OEIsHeavy()) return IsBetween(min, max, count)
# liable for any damages or liability in connection with the Sample Code # or its use. import sys from openeye import oechem ifs = oechem.oemolistream() ifs.open(sys.argv[1]) mol = oechem.OEGraphMol() oechem.OEReadMolecule(ifs, mol) # @ <SNIPPET-OEAtomMatchResidue> resAla = oechem.OEAtomMatchResidueID() resAla.SetName("ALA") predAla = oechem.OEAtomMatchResidue(resAla) print("Number of atoms matching residue name ALA = ", oechem.OECount(mol, predAla)) resChainA = oechem.OEAtomMatchResidueID() resChainA.SetChainID("A") predChainA = oechem.OEAtomMatchResidue(resChainA) print("Number of atoms matching chain A = ", oechem.OECount(mol, predChainA)) resHis = oechem.OEAtomMatchResidueID() resHis.SetName("HIS") resHis.SetChainID("A") resHis.SetResidueNumber("88") predHis = oechem.OEAtomMatchResidue(resHis) print("Number of atoms matching residue (HIS A 88) = ", oechem.OECount(mol, predHis)) # alternative way to initialize as regex predHis2 = oechem.OEAtomMatchResidue("HIS:88:.*:A:.*:.*")
def do(controller): """ """ # get the controller command cmd = controller.command # get the command line arguments and options args = controller.pargs # predicate to remove non-polymer atoms from structure nonpolymers = oechem.OEOrAtom( OEAtomHasIntData(('entity_type_bm', 0)), OEAtomBinaryAndIntData(('entity_type_bm', 3))) assemblysets = get_assembly_sets(args) # directory containing all the biological assemblies in OEB format OEB_ASSEMBLIES_DIR = app.config.get('directories', 'quat_oeb') # directory where surface areas will be written CREDO_DATA_DIR = app.config.get('directories', 'credo_data') ifs = oechem.oemolistream() ifs.SetFormat(oechem.OEFormat_OEB) # initialize progressbar if args.progressbar: bar = ProgressBar(widgets=[ 'PDB entries: ', SimpleProgress(), ' ', Percentage(), Bar() ], maxval=len(assemblysets)).start() # iterate through assembly sets for counter, (pdb, assemblyset) in enumerate(assemblysets, 1): if args.progressbar: bar.update(counter) # create a data directory for this structure to which all data will be written struct_data_dir = os.path.join(CREDO_DATA_DIR, pdb[1:3].lower(), pdb.lower()) # make necessary directories recursively if they do not exist yet if not exists(struct_data_dir): os.makedirs(struct_data_dir) # path to the file where the atom surface areas of all atoms will be written surface_areas_path = os.path.join( struct_data_dir, 'binding_site_atom_surface_areas.credo') # do not recalculate atom surface area contributions if incremental if args.incremental and exists( surface_areas_path) and getsize(surface_areas_path) > 0: continue elif (args.update and exists(surface_areas_path) and getmtime(surface_areas_path) >= time() - (args.update * 60 * 60 * 24) and getsize(surface_areas_path)): app.log.info("Output for PDB entry {0} exists and is more recent than {1} days. Skipped."\ .format(pdb, args.update)) continue # output file stream and CSV writer atomfs = open(surface_areas_path, 'w') atomwriter = csv.writer(atomfs, dialect='tabs') # deal with each found assembly separately # some pdb entries consist of more than one for assembly in assemblyset: if args.quat: path = os.path.join(OEB_ASSEMBLIES_DIR, pdb[1:3].lower(), pdb.lower(), assembly) else: app.log.error("the calculation of buried ligand surface areas " "is only supported for quaternary structures.") sys.exit(1) if not os.path.isfile(path): app.log.warn("cannot calculate buried surface areas: " "file {} does not exist!".format(path)) # get the quaternary structure ifs.open(str(path)) try: assembly = ifs.GetOEGraphMols().next() except StopIteration: assembly = None if not assembly: app.log.warn( "cannot calculate buried surface areas: " "file {} does not contain a valid molecule!".format(path)) continue if not assembly.GetListData('ligands'): continue # identifier of the assembly assembly_serial = assembly.GetIntData('assembly_serial') # remove all non-polymers from assembly for atom in assembly.GetAtoms(nonpolymers): assembly.DeleteAtom(atom) # ignore bizarre assemblies if not assembly.NumAtoms(): app.log.warn( "cannot calculate buried surface areas: " "file {} contains assembly with no atoms!".format(path)) continue # keep only the location state with the largest average occupancy assembly_hi_occ = oechem.OEGraphMol() altlocfactory = oechem.OEAltLocationFactory(assembly) altlocfactory.MakeCurrentAltMol(assembly_hi_occ) # get the ligands ligands = assembly_hi_occ.GetListData('ligands') # iterate through all ligands of the biomolecule and calculate the buried # surface area atom contributions for all involved atoms for ligand in ligands: # ignore small ligands if oechem.OECount(ligand, oechem.OEIsHeavy()) < 7: continue entity_serial = ligand.GetIntData('entity_serial') # keep only the location state with the largest average occupancy altlig = oechem.OEGraphMol() altlocfactory = oechem.OEAltLocationFactory(ligand) altlocfactory.MakeCurrentAltMol(altlig) cmplx_srf = oespicoli.OESurface() ligand_srf = oespicoli.OESurface() # make solvent-accessible surface of ligand oespicoli.OEMakeAccessibleSurface(ligand_srf, altlig, 0.5, 1.4) # get the atom contributions of the assembly surface ligand_atom_areas = get_atom_surface_areas(altlig, ligand_srf) # extract the binding site of the assembly to speed up surface # area calculation binding_site = get_binding_site(assembly_hi_occ, altlig) # make solvent-accessible surface of binding site binding_site_srf = oespicoli.OESurface() oespicoli.OEMakeAccessibleSurface(binding_site_srf, binding_site, 0.5, 1.4) # get the atom contributions of the assembly surface binding_site_atom_areas = get_atom_surface_areas( binding_site, binding_site_srf) # create complex cmplx = oechem.OEGraphMol() oechem.OEAddMols(cmplx, binding_site) oechem.OEAddMols(cmplx, altlig) # make solvent-accessible surface of the complex oespicoli.OEMakeAccessibleSurface(cmplx_srf, cmplx, 0.5, 1.4) # surface area atom contributions of the whole complex cmplx_atom_areas = get_atom_surface_areas(cmplx, cmplx_srf) ## extract the atom surface areas in the bound state through slices binding_site_atom_areas_bound = cmplx_atom_areas[:binding_site. NumAtoms()] ligand_atom_areas_bound = cmplx_atom_areas[binding_site. NumAtoms():] # difference between apo and bound state per polymer atom binding_site_delta = binding_site_atom_areas - binding_site_atom_areas_bound ligand_delta = ligand_atom_areas - ligand_atom_areas_bound # boolean map indicating for which atom the surface area has changed binding_site_atom_map = binding_site_delta != 0 ligand_atom_map = ligand_delta != 0 if args.dry_run: continue # only record the atoms where the solvent-accessible surface # area has actually changed write_atoms(atomwriter, binding_site, binding_site_atom_map, pdb, assembly_serial, entity_serial, binding_site_atom_areas, binding_site_atom_areas_bound) # only record the atoms where the solvent-accessible surface area # has actually changed write_atoms(atomwriter, altlig, ligand_atom_map, pdb, assembly_serial, entity_serial, ligand_atom_areas, ligand_atom_areas_bound) app.log.debug("wrote buried surface areas for all ligands in " "biomolecule {} to {}.".format( pdb, surface_areas_path)) atomfs.flush() atomfs.close() if args.progressbar: bar.finish()
mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "CC(=O)Nc1c[nH]cc1") class PredHasDoubleBondO(oechem.OEUnaryAtomPred): def __call__(self, atom): for bond in atom.GetBonds(): if bond.GetOrder() == 2 and bond.GetNbr(atom).IsOxygen(): return True return False # @ <SNIPPET-PRED-PYBOND> def AmideBond(bond): if bond.GetOrder() != 1: return False atomB = bond.GetBgn() atomE = bond.GetEnd() pred = PredHasDoubleBondO() if atomB.IsCarbon() and atomE.IsNitrogen() and pred(atomB): return True if atomB.IsNitrogen() and atomE.IsCarbon() and pred(atomE): return True return False print("Number of amide bonds =", oechem.OECount(mol, oechem.PyBondPredicate(AmideBond))) # @ </SNIPPET-PRED-PYBOND> # @ </SNIPPET>
if not frag in frags: print('{} not in {}'.format(frag, bond)) failures[ser_bond] = frag continue idx = frags.index(frag) sqrt_mmd = np.sqrt(np.asarray(mmd_scores)) norm = plt.Normalize(min(sqrt_mmd), max(sqrt_mmd)) normed_scores = norm(sqrt_mmd) score = sqrt_mmd[idx] normed_score = normed_scores[idx] print(f.fragments) if tuple(bond) not in f.fragments: bond = tuple(reversed(bond)) mol = f.fragments[tuple(bond)] size = oechem.OECount(mol, oechem.OEIsHeavy()) score_size[ser_bond] = [frag, score, normed_score, size] if ser_bond not in frag_scores_2: continue frags_2 = frag_scores_2[ser_bond]['frags'] mmd_scores_2 = frag_scores_2[ser_bond]['mmd_scores'] sqrt_mmd_2 = np.sqrt(np.asarray(mmd_scores_2)) idx_2 = frags_2.index(frag) score_2 = sqrt_mmd_2[idx_2] norm_2 = plt.Normalize(min(sqrt_mmd_2), max(sqrt_mmd_2)) normed_scores_2 = norm_2(sqrt_mmd_2) normed_score_2 = normed_scores_2[idx_2] if tuple(bond) not in f.fragments:
#!/usr/bin/env python # (C) 2017 OpenEye Scientific Software Inc. All rights reserved. # # TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is # provided to current licensees or subscribers of OpenEye products or # SaaS offerings (each a "Customer"). # Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. # @ <SNIPPET> from __future__ import print_function from openeye import oechem mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "CC(=O)Nc1c[nH]cc1") print("Number of ring bonds =", oechem.OECount(mol, oechem.OEBondIsInRing())) print("Number of rotor bonds =", oechem.OECount(mol, oechem.OEIsRotor())) # @ </SNIPPET>
def process(self, record, port): if record.has_value(self.args.in_mol_field): mol = record.get_value(self.args.in_mol_field) else: self.log.error("Could not find molecules in OEMolRecord") self.failure.emit(record) return parent_torsion_tag = "TORSION_ATOMS_ParentMol" torsion_atoms_in_parent = get_sd_data(mol, parent_torsion_tag).split() dih_name = mol.GetTitle() + "_" + "_".join(torsion_atoms_in_parent) torsion_tag = "TORSION_ATOMS_FRAGMENT" torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split() dihedral_atom_indices = [int(x) - 1 for x in torsion_atoms_in_fragment] if dihedral_atom_indices is None: self.log.warn("Unable to find labelled torsion in %s" % dih_name) self.failure.emit(record) return opt_basis = self.args.opt_basis spe_basis = self.args.spe_basis # If fragment contains S # use 6-31+G* instead of 6-31G* # use 6-31+G** instead of 6-31G** need_diffuse = False if oechem.OECount(mol, oechem.OEIsSulfur()) > 0: need_diffuse = True for atom in mol.GetAtoms(oechem.OEIsHeavy()): if atom.GetFormalCharge() < 0: need_diffuse = True if need_diffuse: if opt_basis == "6-31G*": self.log.warn( "Using 6-31+G* instead of 6-31G* as opt basis because fragment contains S." ) opt_basis = "6-31+G*" if spe_basis == "6-31G*": self.log.warn( "Using 6-31+G* instead of 6-31G* as spe basis because fragment contains S." ) spe_basis = "6-31+G*" if opt_basis == "6-31G**": self.log.warn( "Using 6-31+G** instead of 6-31G** as opt basis because fragment contains S." ) opt_basis = "6-31+G**" if spe_basis == "6-31G**": self.log.warn( "Using 6-31+G** instead of 6-31G** as spe basis because fragment contains S." ) spe_basis = "6-31+G**" try: if self.args.only_selected_conformer: conf_selection_tag = "SELECTED_CONFORMER" key_conf_id = mol.GetIntData(conf_selection_tag) for conf in mol.GetConfs(): if conf.GetIdx() != key_conf_id: continue conf_name = get_sd_data(conf, "CONFORMER_LABEL") else: conf_name = get_sd_data(mol, "CONFORMER_LABEL") time_stamp = "{:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now()) hostname = socket.gethostname() self.log.info("Starting psi4 calculation for %s on %s at %s" % (conf_name, hostname, time_stamp)) if self.args.only_selected_conformer: oechem.OESetSDData(conf, "%s start time" % self.name, time_stamp) else: oechem.OESetSDData(mol, "%s start time" % self.name, time_stamp) dih, _ = get_dihedral(mol, dihedral_atom_indices) calculate_energy( mol, dih, spe_method=self.args.spe_method, spe_basis=spe_basis, geom_opt_technique=self.args.geom_opt_technique, opt_method=self.args.opt_method, opt_basis=opt_basis, geom_maxiter=self.args.geom_maxiter, only_selected_conf=self.args.only_selected_conformer, molden_output=self.args.molden_output, **self.psi4opts) if self.args.only_selected_conformer: conf_selection_tag = "SELECTED_CONFORMER" key_conf_id = mol.GetIntData(conf_selection_tag) for conf in mol.GetConfs(): if conf.GetIdx() != key_conf_id: continue conf_name = get_sd_data(conf, "CONFORMER_LABEL") else: conf_name = get_sd_data(mol, "CONFORMER_LABEL") time_stamp = "{:%Y-%m-%d %H:%M:%S}".format(datetime.datetime.now()) hostname = socket.gethostname() self.log.info("Completed psi4 calculation for %s on %s at %s" % (conf_name, hostname, time_stamp)) if self.args.only_selected_conformer: oechem.OESetSDData(conf, "%s end time" % self.name, time_stamp) else: oechem.OESetSDData(mol, "%s end time" % self.name, time_stamp) optimized_mol_record = OEMolRecord() optimized_mol_record.set_mol(mol) self.success.emit(optimized_mol_record) except Exception as e: print(e) # traceback.print_stack() self.log.error("Error with {} {}".format(mol.GetTitle(), e)) self.failure.emit(record)