def test_success(self): """ Test that YankBindingCube can successfully process a single molecule. """ print('Testing cube:', self.cube.name) # Read a molecule mol = oechem.OEMol() ifs = oechem.oemolistream(get_data_filename('p-xylene.mol2')) if not oechem.OEReadMolecule(ifs, mol): raise Exception('Cannot read molecule') ifs.close() # Process the molecules self.cube.process(mol, self.cube.intake.name) # Assert that one molecule was emitted on the success port self.assertEqual(self.runner.outputs['success'].qsize(), 1) # Assert that zero molecules were emitted on the failure port self.assertEqual(self.runner.outputs['failure'].qsize(), 0) outmol = self.runner.outputs["success"].get() # Check that the number of atoms in input and output molecules match. self.assertEqual(outmol.NumAtoms(), mol.NumAtoms()) # Check that a free energy of hydration has been attached self.assertTrue(oechem.OEHasSDData(outmol, 'DeltaG_yank_binding')) self.assertTrue(oechem.OEHasSDData(outmol, 'dDeltaG_yank_binding'))
def get_sd_data(mol, tag): try: if oechem.OEHasSDData(mol, tag): return oechem.OEGetSDData(mol, tag) if oechem.OEHasSDData(mol.GetActive(), tag): return oechem.OEGetSDData(mol.GetActive(), tag) except AttributeError as e: print(e) return ""
def GetScoreToCmp(mol): if oechem.OEHasSDData(mol, "ShapeTanimoto"): # sort by shape tanimoto if oechem.OEHasSDData(mol, "TanimotoCombo"): return float(oechem.OEGetSDData(mol, "TanimotoCombo")) return float(oechem.OEGetSDData(mol, "ShapeTanimoto")) else: # sort by shape tversky if oechem.OEHasSDData(mol, "TverskyCombo"): return float(oechem.OEGetSDData(mol, "TverskyCombo")) return float(oechem.OEGetSDData(mol, "ShapeTversky"))
def has_ic50(mol): """Return True if this molecule has fluorescence IC50 data""" from openeye import oechem if not oechem.OEHasSDData(mol, 'f_avg_pIC50'): return False try: if oechem.OEHasSDData(mol, 'f_avg_pIC50'): pIC50 = oechem.OEGetSDData(mol, 'f_avg_pIC50') pIC50 = float(pIC50) return True else: return False except Exception as e: return False
def RenderData(image, mol, tags): from openeye import oechem from openeye import oedepict data = [] for tag in tags: value = "N/A" if oechem.OEHasSDData(mol, tag): value = oechem.OEGetSDData(mol, tag) data.append((tag, value)) nrdata = len(data) tableopts = oedepict.OEImageTableOptions( nrdata, 2, oedepict.OEImageTableStyle_LightBlue) tableopts.SetColumnWidths([10, 20]) tableopts.SetMargins(2.0) tableopts.SetHeader(False) tableopts.SetStubColumn(True) table = oedepict.OEImageTable(image, tableopts) for row, (tag, value) in enumerate(data): cell = table.GetCell(row + 1, 1) table.DrawText(cell, tag + ":") cell = table.GetBodyCell(row + 1, 1) table.DrawText(cell, value)
def SDF2CSV(ifs, csv): taglist = [] # read through once to find all unique tags for mol in ifs.GetOEGraphMols(): for dp in oechem.OEGetSDDataPairs(mol): if dp.GetTag() not in taglist: taglist.append(dp.GetTag()) ifs.rewind() # print out column labels header = "Title" for tag in taglist: header += ",%s" % tag header += '\n' csv.write(header) # build csv file for mol in ifs.GetOEGraphMols(): line = [mol.GetTitle()] for tag in taglist: if oechem.OEHasSDData(mol, tag): value = oechem.OEGetSDData(mol, tag) else: value = '' line.append(',') line.append(value) csv.write(''.join(line)) csv.write('\n')
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData, argv) if not (itf.HasDouble("-min") or itf.HasDouble("-max")): oechem.OEThrow.Fatal("Please set a filter value with -min or -max") ifs = oechem.oemolistream() if not ifs.open(itf.GetString("-i")): oechem.OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-i")) if not oechem.OEIsSDDataFormat(ifs.GetFormat()): oechem.OEThrow.Fatal( "Only works for input file formats that support SD data (sdf,oeb,csv)" ) ofs = oechem.oemolostream() if not ofs.open(itf.GetString("-o")): oechem.OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-i")) if not oechem.OEIsSDDataFormat(ofs.GetFormat()): oechem.OEThrow.Fatal( "Only works for output file formats that support SD data \ (sdf,oeb,csv)") tag = itf.GetString("-tag") minval = float("-inf") if itf.HasDouble("-min"): minval = itf.GetDouble("-min") maxval = float("inf") if itf.HasDouble("-max"): maxval = itf.GetDouble("-max") for mol in ifs.GetOEGraphMols(): if not oechem.OEHasSDData(mol, tag): oechem.OEThrow.Warning("Unable to find %s tag on %s" % (tag, mol.GetTitle())) continue value = oechem.OEGetSDData(mol, tag) try: tagvalue = float(value) except ValueError: oechem.OEThrow.Warning("Failed to convert (%s) to a number in %s" % (value, mol.GetTitle())) continue if tagvalue < minval: continue if tagvalue > maxval: continue oechem.OEWriteMolecule(ofs, mol)
def Rename(ifs, ofs, fieldname): for mol in ifs.GetOEGraphMols(): if oechem.OEHasSDData(mol, fieldname): mol.SetTitle(oechem.OEGetSDData(mol, fieldname)) else: title = mol.GetTitle() oechem.OEThrow.Warning( "Renaming of molecule %s failed; no field %s" % (title, fieldname)) oechem.OEWriteMolecule(ofs, mol)
def test_read_mols_slice(): mlist = read_mols(os.path.join(mydir, 'data_tests', 'two_alkanes_prefilt.sdf'), mol_slice=[0, 1, 1]) assert len(mlist) == 1 assert mlist[0].GetTitle() == 'AlkEthOH_c312' assert mlist[0].NumConfs() == 9 conf = list(mlist[0].GetConfs())[0] assert oechem.OEHasSDData(conf, "MM Szybki SD Energy") == True #AlkEthOH_c1178, Div_2, Div_6, Div_9, Div_3b, Div_7b, Div_8b, AlkEthOH_r187 mlist = read_mols(os.path.join(mydir, 'data_tests', 'eight_mols.sdf'), mol_slice=[2, 8, 2]) assert len(mlist) == 3 assert mlist[0].GetTitle() == 'Div_6' assert mlist[1].GetTitle() == 'Div_3b' assert mlist[2].GetTitle() == 'Div_8b'
def dock_molecule(molecule, ofs, default_receptor='x0387'): """ Dock the specified molecules, writing out to specified file Parameters ---------- molecule : OEMol The molecule to dock ofs : oechem.oemolostream The filename to stream docked molecules to default_receptor : str, optional, default='0387' The default receptor to dock to """ # Make a copy of the molecule molecule = oechem.OEMol(molecule) import os # Extract list of corresponding receptor(s) import oechem print(f'\n{molecule.GetTitle()}') if oechem.OEHasSDData(molecule, "fragments"): fragments = oechem.OEGetSDData(molecule, "fragments").split(',') print(f'fragments before filter: {fragments}') fragments = [ fragment for fragment in fragments if os.path.exists(f'../receptors/Mpro-{fragment}-receptor.oeb.gz') ] print(f'fragments after filter: {fragments}') if len(fragments) == 0: fragments = [default_receptor] for fragment in fragments: molecule_to_dock = oechem.OEMol(molecule) import os receptor_filename = os.path.join( f'../receptors/Mpro-{fragment}-receptor.oeb.gz') oechem.OESetSDData(molecule_to_dock, "fragments", fragment) # Enumerate reasonable protomers/tautomers from openeye import oequacpac protomer = oechem.OEMol() protomers = [ oechem.OEMol(protomer) for protomer in oequacpac.OEGetReasonableProtomers(molecule_to_dock) ] dock_molecules_to_receptor(receptor_filename, protomers, ofs)
def test_success(self): print('Testing cube:', self.cube.name) # Read a molecule mol = oechem.OEMol() ifs = oechem.oemolistream(utils.get_data_filename('examples', 'data/TOL-smnf.oeb.gz')) if not oechem.OEReadMolecule(ifs, mol): raise Exception('Cannot read molecule') ifs.close() # Process the molecules self.cube.process(mol, self.cube.intake.name) # Assert that one molecule was emitted on the success port self.assertEqual(self.runner.outputs['success'].qsize(), 1) # Assert that zero molecules were emitted on the failure port self.assertEqual(self.runner.outputs['failure'].qsize(), 0) # Get the output molecule, check that it has score. outmol = self.runner.outputs["success"].get() self.assertTrue(oechem.OEHasSDData(outmol, 'Chemgauss4'))
def FilterMolData(self, mol): if not oechem.OEHasSDData(mol): return 0 if self.fields is None: return -1 if len(self.fields) == 0: oechem.OEClearSDData(mol) return 0 validdata = 0 deletefields = [] for dp in oechem.OEGetSDDataPairs(mol): tag = dp.GetTag() if tag not in self.fields: deletefields.append(tag) continue value = oechem.OEGetSDData(mol, tag) if self.asFloating: try: float(value) except ValueError: oechem.OEThrow.Warning("Failed to convert %s to numeric value (%s) in %s" % (tag, value, mol.GetTitle())) deletefields.append(tag) continue validdata += 1 if not validdata: oechem.OEClearSDData(mol) else: for nuke in deletefields: oechem.OEDeleteSDData(mol, nuke) return validdata
def delete_sd_data(mol, tag, locator_tag): if oechem.OEHasSDData(mol, locator_tag): return oechem.OEDeleteSDData(mol, tag) elif oechem.OEHasSDData(mol.GetActive(), locator_tag): return oechem.OEDeleteSDData(mol.GetActive(), tag) return False
def __contains__(self, kee:str) -> bool: return oechem.OEHasSDData(self._mol, kee)
def IdentifyMinima(mol, tag, ThresholdE, ThresholdRMSD): """ For a molecule's set of conformers computed with some level of theory, whittle down unique conformers based on energy and RMSD. Parameters ---------- mol OEChem molecule with all of its conformers tag string name of the SD tag in this molecule ThresholdE float value for abs(E1-E2), below which 2 confs are "same" Units are hartrees (default output units of Psi4) ThresholdR float value for RMSD, below which 2 confs are "same" Units are in Angstrom (Psi4 default) Returns ------- boolean True if successful filter + delete. False if there's only one conf and it didn't optimize, or something else funky. """ # Parameters for OpenEye RMSD calculation automorph = True heavyOnly = False overlay = True # declare variables for conformers to delete confsToDel = set() delCount = 0 # check if SD tag exists for the case of single conformer if mol.NumConfs() == 1: testmol = mol.GetConfs().next() for x in oechem.OEGetSDDataPairs(mol): if tag.lower() in x.GetTag().lower(): return True else: return False # Loop over conformers twice (NxN diagonal comparison of RMSDs) for confRef in mol.GetConfs(): print(" ~ Reference: %s conformer %d" % (mol.GetTitle(), confRef.GetIdx() + 1)) # get real tag (correct for capitalization) for x in oechem.OEGetSDDataPairs(confRef): if tag.lower() in x.GetTag().lower(): taglabel = x.GetTag() # delete cases that don't have energy (opt not converged; or other) if not oechem.OEHasSDData(confRef, taglabel): confsToDel.add(confRef.GetIdx()) delCount += 1 continue refE = float(oechem.OEGetSDData(confRef, taglabel)) for confTest in mol.GetConfs(): # upper right triangle comparison if confTest.GetIdx() <= confRef.GetIdx(): continue # skip cases already set for removal if confTest.GetIdx() in confsToDel: continue # delete cases that don't have energy if not oechem.OEHasSDData(confTest, taglabel): confsToDel.add(confTest.GetIdx()) continue testE = float(oechem.OEGetSDData(confTest, taglabel)) # if MM (not Psi4) energies, convert absERel to Hartrees if 'mm' in taglabel.lower(): absERel = abs(refE - testE) / 627.5095 else: absERel = abs(refE - testE) # if energies are diff enough --> confs are diff --> keep & skip ahead if absERel > ThresholdE: continue # if energies are similar, see if they are diff by RMSD rmsd = oechem.OERMSD(confRef, confTest, automorph, heavyOnly, overlay) # if measured_RMSD < threshold_RMSD --> confs are same --> delete if rmsd < ThresholdRMSD: confsToDel.add(confTest.GetIdx()) # for the same molecule, delete tagged conformers print("%s original number of conformers: %d" % (mol.GetTitle(), mol.NumConfs())) if delCount == mol.NumConfs(): # all conformers in this mol has been tagged for deletion return False for conf in mol.GetConfs(): if conf.GetIdx() in confsToDel: print('Removing %s conformer index %d' % (mol.GetTitle(), conf.GetIdx())) if not mol.DeleteConf(conf): oechem.OEThrow.Fatal("Unable to delete %s GetIdx() %d" \ % (mol.GetTitle(), conf.GetIdx())) return True
import argparse from openeye import oechem from tqdm import tqdm def getargs(): parser = argparse.ArgumentParser() parser.add_argument("-i", required=True, type=str) parser.add_argument("-o", required=True, type=str) return parser.parse_args() if __name__ == '__main__': args = getargs() ifs = oechem.oemolistream(args.i) ofs = oechem.oemolostream(args.o) lig = oechem.OEGraphMol() pbar = tqdm() while oechem.OEReadMolecule(ifs, lig): if oechem.OEHasSDData(lig, "Catalog ID"): lig.SetTitle(oechem.OEGetSDData(lig, "Catalog ID")) oechem.OEWriteMolecule(ofs, lig) pbar.update(1) pbar.close() ifs.close() ofs.close()
mols.append(mol.CreateCopy()) print(f'{len(mols)} molecules read') # Annotate molecules with SMARTS labels print('Annotating SMARTS labels...') import csv labels_filename = 'annotations/benzopyran_annotations.csv' # list of labels for various SMARTS patterns smarts_labels = dict() with open(labels_filename, 'r') as csvfile: csvreader = csv.reader(csvfile, delimiter=',') for row in csvreader: smarts = row[0] label = row[1] smarts_labels[smarts] = label # Label the molecules for smarts, label in smarts_labels.items(): ss = oechem.OESubSearch(smarts) for mol in track(mols, description=label): oechem.OEPrepareSearch(mol, ss) if ss.SingleMatch(mol): oechem.OESetSDData(mol, 'intermediate', label) # Discard molecules without labels mols = [mol for mol in mols if oechem.OEHasSDData(mol, 'intermediate')] print(f'{len(mols)} molecules remain after discarding unlabeled molecules') # Write molecules output_filename = 'sorted/sprint-5-dimer.csv' with oechem.oemolostream(output_filename) as ofs: for mol in track(mols, description='Writing molecules...'): oechem.OEWriteMolecule(ofs, mol)
def get_series(mol): from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import Descriptors series_SMARTS_dict = { #"3-aminopyridine": "[R1][C,N;R0;!$(NC(=O)CN)]C(=O)[C,N;R0;!$(NC(=O)CN)][c]1cnccc1", "3-aminopyridine-like": "[R1]!@[C,N]C(=O)[C,N]!@[R1]", "3-aminopyridine-strict": "c1ccncc1NC(=O)!@[R1]", "Ugi": "[c,C:1][C](=[O])[N]([c,C,#1:2])[C]([c,C,#1:3])([c,C,#1:4])[C](=[O])[NH1][c,C:5]", "quinolones": "NC(=O)c1cc(=O)[nH]c2ccccc12", "piperazine-chloroacetamide": "O=C(CCl)N1CCNCC1", #'benzotriazoles': 'c1ccc(NC(=O)[C,N]n2nnc3ccccc32)cc1', #'benzotriazoles': 'a1aaa([C,N]C(=O)[C,N]a2aaa3aaaaa32)aa1', 'benzotriazoles': 'a2aaa3aaaaa32', } smi = oechem.OECreateSmiString(mol) # Filter out covalent try: covalent_warheads = ['acrylamide', 'chloroacetamide'] for warhead in covalent_warheads: if oechem.OEHasSDData(mol, warhead) and oechem.OEGetSDData( mol, warhead) == 'True': return None except Exception as e: logging.warning(e) def check_if_smi_in_series(smi, SMARTS, MW_cutoff=550, num_atoms_cutoff=70, num_rings_cutoff=10): mol = Chem.MolFromSmiles(smi) MW = Chem.Descriptors.MolWt(mol) num_heavy_atoms = mol.GetNumHeavyAtoms() num_rings = Chem.rdMolDescriptors.CalcNumRings(mol) patt = Chem.MolFromSmarts(SMARTS) if ((len( Chem.AddHs(Chem.MolFromSmiles(smi)).GetSubstructMatches(patt)) > 0) and (MW <= MW_cutoff) and (num_heavy_atoms <= num_atoms_cutoff) and (num_rings <= num_rings_cutoff)): return True else: return False for series in series_SMARTS_dict: series_SMARTS = series_SMARTS_dict[series] if series == "3-amonipyridine-like": if check_if_smi_in_series( smi, series_SMARTS, MW_cutoff=410, num_rings_cutoff=3, num_atoms_cutoff=28, ): return series else: if check_if_smi_in_series(smi, series_SMARTS): return series return None
# Compounds from fah_xchem.schema import Compound, CompoundMetadata smiles_flag = oechem.OESMILESFlag_Canonical | oechem.OESMILESFlag_ISOMERIC from openeye import oechem print('Processing compounds...') compounds = dict() with oechem.oemolistream(compounds_sdf_filename) as ifs: for oemol in ifs.GetOEGraphMols(): # Set ID and SMILES compound_id = oemol.GetTitle() smiles = oechem.OECreateSmiString(oemol, smiles_flag) # Extract experimental data, if present experimental_data = dict() if oechem.OEHasSDData(oemol, 'f_avg_pIC50'): pIC50 = oechem.OEGetSDData(oemol, 'f_avg_pIC50') if pIC50 != '': pIC50 = float(pIC50) experimental_data['pIC50'] = pIC50 # Extract information about the compound compound_metadata = CompoundMetadata( compound_id=compound_id, smiles=oechem.OECreateSmiString(oemol, smiles_flag), experimental_data=experimental_data, ) # Create new compound compound = Compound(metadata=compound_metadata, microstates=list()) # Store compound compounds[compound_id] = compound
def DumpSDData(mol): print("SD data of", mol.GetTitle()) # loop over SD data for dp in oechem.OEGetSDDataPairs(mol): print(dp.GetTag(), ':', dp.GetValue()) print() mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1") mol.SetTitle("benzene") # set some tagged data oechem.OESetSDData(mol, "color", "brown") oechem.OESetSDData(mol, oechem.OESDDataPair("size", "small")) DumpSDData(mol) # check for existence of data, then delete it if oechem.OEHasSDData(mol, "size"): oechem.OEDeleteSDData(mol, "size") DumpSDData(mol) # add additional color data oechem.OEAddSDData(mol, "color", "black") DumpSDData(mol) # remove all SD data oechem.OEClearSDData(mol) DumpSDData(mol) # @ </SNIPPET>
molstring = '''\ ethene -OEChem-04060917472D 2 1 0 0 0 0 0 0 0999 V2000 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.0000 0.0000 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1 2 1 0 0 0 0 M END > <weight> 30.069040000000 $$$$ ''' ims = oechem.oemolistream() ims.SetFormat(oechem.OEFormat_SDF) ims.openstring(molstring) mol = oechem.OEGraphMol() oechem.OEReadMolecule(ims, mol) # @ <SNIPPET-GET-SD> if oechem.OEHasSDData(mol, "weight"): weight = float(oechem.OEGetSDData(mol, "weight")) print("weight=", weight) # @ </SNIPPET-GET-SD> # @ <SNIPPET-SET-SD> oechem.OESetSDData(mol, "number of atoms", str(mol.NumAtoms())) # @ </SNIPPET-SET-SD> # @ </SNIPPET>
head, tail = os.path.split(args.molecules_filename) prefix, ext = os.path.splitext(tail) molecule.SetTitle(f'{prefix}-{args.molecule_index}') # Dock if the molecule has not already been docked from openeye import oechem sdf_filename = os.path.join(docking_basedir, f'{molecule.GetTitle()} - ligand.sdf') if not os.path.exists(sdf_filename): # Determine what fragments to dock to if is_covalent: fragments_to_dock_to = covalent_active_site_fragments else: fragments_to_dock_to = all_fragments if args.userfrags: if oechem.OEHasSDData(molecule, 'fragments'): fragments_to_dock_to = oechem.OEGetSDData(molecule, 'fragments').split(',') # Dock the molecule docked_molecule = ensemble_dock(molecule, fragments_to_dock_to, covalent=is_covalent) else: # Read the molecule print(f'Docked molecule exists, so reading from {sdf_filename}') with oechem.oemolistream(sdf_filename) as ifs: docked_molecule = oechem.OEGraphMol() oechem.OEReadMolecule(ifs, docked_molecule) if docked_molecule is None: print('No docking poses available') import sys sys.exit(0)
def set_sd_tags(Conf, Props, calctype): """ For one particular conformer, set all available SD tags based on data in Props dictionary. Warning ------- If the exact tag already exists, and you want to add a new one then there will be duplicate tags with maybe different data. (NOT recommended). Then the function to get sd_list will only get one or the other; I think it just gets the first matching tag. TODO: maybe add some kind of checking to prevent duplicate tags added Parameters ---------- Conf: Single conformer from OEChem molecule Props: Dictionary output from ProcessOutput function. Should contain the keys: basis, method, numSteps, initEnergy, finalEnergy, coords, time, pkg calctype: string; one of 'opt','spe','hess' for geometry optimization, single point energy calculation, or Hessian calculation """ # get level of theory for setting SD tags method = Props['method'] basisset = Props['basis'] pkg = Props['package'] # turn parameters into tag descriptions full_method = "{}/{}".format(method, basisset) cdict = {'spe': 'Single Pt.', 'opt': 'Opt.', 'hess': 'Hessian'} # time info can be set for all cases taglabel = "QM {} {} Runtime (sec) {}".format(pkg, cdict[calctype], full_method) oechem.OEAddSDData(Conf, taglabel, str(Props['time'])) # hessian has no other info for sd tag if calctype == 'hess': return # check that finalEnergy is there. if not, opt probably did not finish # make a note of that in SD tag then quit function if not 'finalEnergy' in Props: taglabel = "Note on {} {}".format(cdict[calctype], full_method) oechem.OEAddSDData(Conf, taglabel, "JOB DID NOT FINISH") return # Set new SD tag for conformer's final energy taglabel = "QM {} Final {} Energy (Har) {}".format(pkg, cdict[calctype], full_method) oechem.OEAddSDData(Conf, taglabel, str(Props['finalEnergy'])) # Set new SD tag for final SCS-MP2 energy if method is MP2 if method.lower() == 'mp2': taglabel = "QM {} Final {} Energy (Har) SCS-{}".format( pkg, cdict[calctype], full_method) oechem.OEAddSDData(Conf, taglabel, str(Props['finalSCSEnergy'])) # Add COSMO energy with outlying charge correction. Turbomole only! if 'ocEnergy' in Props: if calctype == 'spe': print( "Extraction of COSMO OC energy from Turbomole not yet supported for SPE calcns" ) elif calctype == 'opt': taglabel = "QM {} Final {} Energy with OC correction (Har) {}".format( pkg, cdict[calctype], full_method) oechem.OEAddSDData(Conf, taglabel, str(Props['ocEnergy'])) # spe has no other relevant info for sd tag if calctype == 'spe': return # Set new SD tag for original conformer number if not existing # !! Opt2 files should ALREADY have this !! Opt2 index is NOT orig index !! taglabel = "Original omega conformer number" if not oechem.OEHasSDData(Conf, taglabel): # if not working with confs, will have no GetIdx try: oechem.OEAddSDData(Conf, taglabel, str(Conf.GetIdx() + 1)) except AttributeError as err: pass # if tag exists, append new conformer ID after the old one else: try: oldid = oechem.OEGetSDData(Conf, taglabel) newid = str(Conf.GetIdx() + 1) totid = "{}, {}".format(oldid, newid) oechem.OESetSDData(Conf, taglabel, totid) except AttributeError as err: pass # Set new SD tag for numSteps of geom. opt. taglabel = "QM {} {} Steps {}".format(pkg, cdict[calctype], full_method) oechem.OEAddSDData(Conf, taglabel, str(Props['numSteps'])) # Set new SD tag for conformer's initial energy taglabel = "QM {} Initial {} Energy (Har) {}".format( pkg, cdict[calctype], full_method) oechem.OEAddSDData(Conf, taglabel, str(Props['initEnergy']))
def has_sd_data(mol, tag): if oechem.OEHasSDData(mol, tag): return True if oechem.OEHasSDData(mol.GetActive(), tag): return True return False
def __getitem__(self, kee:str ): if oechem.OEHasSDData(self._mol, kee): return oechem.OEGetSDData(self._mol, kee) else: raise KeyError("{} has no key {!r}". format(self.__class__.__name__, kee))
def ChEMBLSolubilityUsage(itf): ifs = oechem.oemolistream() if not ifs.open(itf.GetString("-input")): oechem.OEThrow.Fatal("Unable to open %s for reading: " + itf.GetString("-input")) ofs = oechem.oemolostream() if not ofs.open(itf.GetString("-output")): oechem.OEThrow.Fatal("Unable to open %s for writing: " + ofs.GetString("-output")) oechem.OEThrow.SetLevel(oechem.OEErrorLevel_Warning) # @ <SNIPPET-OEAPPLYCHEMBLSOLUBILITY-EXAMPLE> # number of bonds of chemistry context at site of change # for the applied transforms totalmols = 0 xformctxt = oemedchem.OEMatchedPairContext_Bond2 for molidx, mol in enumerate(ifs.GetOEGraphMols(), start=1): # consider only the largest input fragment oechem.OEDeleteEverythingExceptTheFirstLargestComponent(mol) smolcnt = 0 # only consider solubility transforms having at least 5 matched pairs for solMol in oemedchem.OEApplyChEMBL24SolubilityTransforms( mol, xformctxt, 5): # compute net change in solubility from MMP data deltasol = [] if oechem.OEHasSDData(solMol, "OEMMP_normalized_value (uM)"): for sditem in oechem.OEGetSDData( solMol, "OEMMP_normalized_value (uM)").split('\n'): # fromIndex,toIndex,fromValue,toValue sdvalues = sditem.split(',') if not sdvalues[2] or not sdvalues[3]: continue deltasol.append(float(sdvalues[3]) - float(sdvalues[2])) if not len(deltasol): continue avgsol = deltasol[0] if len(deltasol) > 1: avgsol = average(deltasol) # reject examples with net decrease in solubility if avgsol < 0.0: continue sdev = stddev(deltasol) # annotate with average,stddev,num oechem.OEAddSDData( solMol, "OEMMP_average_delta_normalized_value", "{0:.1F},{1:.2F},{2}".format(avgsol, sdev, len(deltasol))) # export solubility transformed molecule with SDData annotations if oechem.OEWriteMolecule( ofs, solMol) == oechem.OEWriteMolReturnCode_Success: smolcnt += 1 oechem.OEThrow.Info("{0}: Exported molecule count, {1}".format( molidx, smolcnt)) totalmols += smolcnt # @ </SNIPPET-OEAPPLYCHEMBLSOLUBILITY-EXAMPLE> print("Exported molecule count = {0}".format(totalmols)) return True
def process(self, mol, port): """ The input to this cube will be an OEMol with one or more conformers with "CONFORMER_LABEL" SD Data of the form 'XY-1234567_1_2_3_4_00_00' """ num_confs = mol.NumConfs() last_conf = mol.GetActive() last_conf_name = oechem.OEGetSDData(last_conf, "CONFORMER_LABEL") self.log.info( "Processing conformer {} on {} at {:%Y-%m-%d %H:%M:%S}".format( last_conf_name, os.environ["HOSTNAME"], datetime.datetime.now())) if num_confs == self.args.num_points: self.success.emit(mol) self.log.info( "Completed scan for {} on {} at {:%Y-%m-%d %H:%M:%S}".format( mol.GetTitle(), os.environ["HOSTNAME"], datetime.datetime.now())) return if num_confs == 1 and not mol.HasData(self.conf_selection_tag): self.log.info( "Conformer {} is a fresh starting conformer on {} at {:%Y-%m-%d %H:%M:%S}" .format(mol.GetTitle(), os.environ["HOSTNAME"], datetime.datetime.now())) mol.SetIntData(self.conf_selection_tag, last_conf.GetIdx()) last_conf.SetDoubleData("TORSION_ANGLE", 0.0) oechem.OESetSDData(last_conf, "TORSION_ANGLE", "0.0") self.log.info( "Sending conformer {} to energy calculation from {} at {:%Y-%m-%d %H:%M:%S}" .format(last_conf_name, os.environ["HOSTNAME"], datetime.datetime.now())) self.to_energy_calc.emit(mol) return try: torsion_tag = "TORSION_ATOMS_FRAGMENT" torsion_atoms_in_fragment = get_sd_data(mol, torsion_tag).split() dihedral_atom_indices = [ int(x) - 1 for x in torsion_atoms_in_fragment ] dih, _ = get_dihedral(mol, dihedral_atom_indices) dih_atoms = [x for x in dih.GetAtoms()] # if the last energy calculation failed if not oechem.OEHasSDData(last_conf, "PSI4_ENERGY"): self.log.info( "Conformer {} found to have NO ENERGY on {} at {:%Y-%m-%d %H:%M:%S}" .format(last_conf_name, os.environ["HOSTNAME"], datetime.datetime.now())) mol.PopActive() last_conf = mol.GetActive() new_conf = mol.NewConf(last_conf) mol.PushActive(new_conf) conf_no = num_confs conformer_label = last_conf_name[:-3] + "_{:02d}".format(conf_no) oechem.OESetSDData(new_conf, "CONFORMER_LABEL", conformer_label) angle = num_confs * 2 * oechem.Pi / self.args.num_points angle_deg = oechem.Rad2Deg * angle new_conf.SetDoubleData("TORSION_ANGLE", angle_deg) oechem.OESetSDData(new_conf, "TORSION_ANGLE", "{:.1f}".format(angle_deg)) if not oechem.OESetTorsion(new_conf, dih_atoms[0], dih_atoms[1], dih_atoms[2], dih_atoms[3], angle): self.log.error( "Could not rotate conformer {} by {:.1f} on {} at {:%Y-%m-%d %H:%M:%S}" .format( last_conf_name, angle_deg, os.environ["HOSTNAME"], datetime.datetime.now(), )) mol.SetIntData(self.conf_selection_tag, new_conf.GetIdx()) self.log.info( "Sending conformer {} to energy calculation from {} at {:%Y-%m-%d %H:%M:%S}" .format(conformer_label, os.environ["HOSTNAME"], datetime.datetime.now())) self.to_energy_calc.emit(mol) except Exception as e: self.log.error( "COuld not drive torsion in conformer {} on {} at {:%Y-%m-%d %H:%M:%S}: {}" .format(last_conf_name, os.environ["HOSTNAME"], datetime.datetime.now(), e)) self.failure.emit(mol)
def SetOptSDTags(Conf, Props, spe=False): """ For one particular conformer, set all available SD tags based on data in Props dictionary. Warning ------- If the exact tag already exists, and you want to add a new one then there will be duplicate tags with maybe different data. (NOT recommended). Then the function to get SDList will only get one or the other; I think it just gets the first matching tag. TODO: maybe add some kind of checking to prevent duplicate tags added Parameters ---------- Conf: Single conformer from OEChem molecule Props: Dictionary output from ProcessOutput function. Should contain the keys: basis, method, numSteps, initEnergy, finalEnergy, coords, time, pkg spe: Boolean - are the results of a single point energy calcn? """ # get level of theory for setting SD tags method = Props['method'] basisset = Props['basis'] pkg = Props['package'] # check that finalEnergy is there. if not, opt probably did not finish # make a note of that in SD tag if not 'finalEnergy' in Props: if not spe: oechem.OEAddSDData(Conf, "Note on opt. %s/%s" \ % (method, basisset), "JOB DID NOT FINISH") else: oechem.OEAddSDData(Conf, "Note on SPE %s/%s"\ % (method, basisset), "JOB DID NOT FINISH") return # Set new SD tag for conformer's final energy if not spe: taglabel = "QM %s Final Opt. Energy (Har) %s/%s" % (pkg, method, basisset) else: taglabel = "QM %s Single Pt. Energy (Har) %s/%s" % (pkg, method, basisset) oechem.OEAddSDData(Conf, taglabel, str(Props['finalEnergy'])) # Set new SD tag for wall-clock time if not spe: taglabel = "QM %s Opt. Runtime (sec) %s/%s" % (pkg, method, basisset) else: taglabel = "QM %s Single Pt. Runtime (sec) %s/%s" % (pkg, method, basisset) oechem.OEAddSDData(Conf, taglabel, str(Props['time'])) # Add COSMO energy with outlying charge correction. Turbomole only! if 'ocEnergy' in Props: if not spe: taglabel = "QM %s Final Opt. Energy with OC correction (Har) %s/%s" % ( pkg, method, basisset) else: print( "Extraction of COSMO OC energy from Turbomole not yet supported for SPE calcns" ) oechem.OEAddSDData(Conf, taglabel, str(Props['ocEnergy'])) if spe: return # stop here if SPE # Set new SD tag for original conformer number # !! Opt2 files should ALREADY have this !! Opt2 index is NOT orig index !! taglabel = "Original omega conformer number" # add new tag if not existing if not oechem.OEHasSDData(Conf, taglabel): # if not working with confs, will have no GetIdx try: oechem.OEAddSDData(Conf, taglabel, str(Conf.GetIdx() + 1)) except AttributeError as err: pass # if tag exists, append new conformer ID after the old one else: # if not working with confs, will have no GetIdx try: oldid = oechem.OEGetSDData(Conf, taglabel) newid = str(Conf.GetIdx() + 1) totid = "{}, {}".format(oldid, newid) oechem.OESetSDData(Conf, taglabel, totid) except AttributeError as err: pass # Set new SD tag for numSteps of geom. opt. taglabel = "QM %s Opt. Steps %s/%s" % (pkg, method, basisset) oechem.OEAddSDData(Conf, taglabel, str(Props['numSteps'])) # Set new SD tag for conformer's initial energy taglabel = "QM %s Initial Opt. Energy (Har) %s/%s" % (pkg, method, basisset) oechem.OEAddSDData(Conf, taglabel, str(Props['initEnergy']))