def keep_molecule(mol, max_heavy_atoms = 100, remove_smirks = list(), max_metals = 0, elements = [], check_type = None): if oechem.OECount(mol, oechem.OEIsMetal()) > max_metals: return False if oechem.OECount(mol, oechem.OEIsHeavy()) > max_heavy_atoms: return False # Remove very small molecules that are not interesting if oechem.OECount(mol, oechem.OEIsHeavy()) < 5: return False for smirks in remove_smirks: qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smirks): continue ss = oechem.OESubSearch(qmol) matches = [match for match in ss.Match(mol, False)] if len(matches) > 0: return False if elements != None: elements_list = read_Elements(elements) if not check_element(mol, elements_list): return False if check_type != None: types = check_type.split(",") if not check_atomtype(mol, types): return False return check_valence(mol)
def _find_torsions_from_smarts(molecule, smarts): """ Do a substrcutre search on provided SMARTS to find torsions that match the SAMRTS Parameters ---------- molecule: OEMol molecule to search on smarts: str SMARTS pattern to search for Returns ------- tors: list list of torsions that match the SMARTS string """ from openeye import oechem #ToDO use MDL aromaticity model qmol=oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smarts): utils.logger().warning('OEParseSmarts failed') ss = oechem.OESubSearch(qmol) tors = [] oechem.OEPrepareSearch(molecule, ss) unique = True for match in ss.Match(molecule, unique): tor = [] for ma in match.GetAtoms(): tor.append(ma.target) tors.append(tor) return tors
def get_covalent_warhead_atom(molecule, covalent_warhead_type): """ Get tagged atom index in provided tagged SMARTS string, or None if no match found. Parameters ---------- molecule : openeye.oechem.OEMol The molecule to search covalent_warhead : str Covalent warhead name Returns ------- index : int or None The atom index in molecule of the covalent atom, or None if SMARTS does not match """ smarts = covalent_warhead_smarts[covalent_warhead_type] qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smarts): raise ValueError(f"Error parsing SMARTS '{smarts}'") substructure_search = oechem.OESubSearch(qmol) substructure_search.SetMaxMatches(1) matches = list() for match in substructure_search.Match(molecule): # Compile list of atom indices that match the pattern tags for matched_atom in match.GetAtoms(): if(matched_atom.pattern.GetMapIdx()==1): return matched_atom.target.GetIdx() return None
def keep_molecule(mol, remove_smirks = list()): """ Determines if the molecule will be stored. Parameters ---------- mol - OEMol remove_smirks - list of SMIRKS strings you don't want in your molecules Returns ------- boolean - True (molecule meets the requirements below) - has no metal atoms - no more than 200 heavy atoms - has none of the SMIRKS in remove_smirks list - molecule has appropriate valency """ # Check number of metal atoms if oechem.OECount(mol, oechem.OEIsMetal()) > 0: return False # Check number of heavy atoms if oechem.OECount(mol, oechem.OEIsHeavy()) > 200: return False # Check for patterns in remove smirks list for smirks in remove_smirks: qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smirks): continue ss = oechem.OESubSearch(qmol) matches = [match for match in ss.Match(mol, False)] if len(matches) > 0: return False # check valency return check_valence(mol)
def _tag_fgroups(mol, fgroups_smarts=None): """ This function tags atoms and bonds of functional groups defined in fgroup_smarts. fgroup_smarts is a dictionary that maps functional groups to their smarts pattern. It can be user generated or from yaml file. Parameters ---------- mol: Openeye OEMolGraph frgroups_smarts: dictionary of functional groups mapped to their smarts pattern. Default is None. It uses 'fgroup_smarts.yaml' Returns ------- fgroup_tagged: dict a dictionary that maps indexed functional groups to corresponding atom and bond indices in mol """ if not fgroups_smarts: # Load yaml file fn = resource_filename('fragmenter', os.path.join('data', 'fgroup_smarts.yml')) f = open(fn, 'r') fgroups_smarts = yaml.safe_load(f) f.close() fgroup_tagged = {} for f_group in fgroups_smarts: qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, fgroups_smarts[f_group]): print('OEParseSmarts failed') ss = oechem.OESubSearch(qmol) oechem.OEPrepareSearch(mol, ss) for i, match in enumerate(ss.Match(mol, True)): fgroup_atoms = set() for ma in match.GetAtoms(): fgroup_atoms.add(ma.target.GetIdx()) tag = oechem.OEGetTag('fgroup') ma.target.SetData(tag, '{}_{}'.format(f_group, str(i))) fgroup_bonds = set() for ma in match.GetBonds(): #if not ma.target.IsInRing(): fgroup_bonds.add(ma.target.GetIdx()) tag =oechem.OEGetTag('fgroup') ma.target.SetData(tag, '{}_{}'.format(f_group, str(i))) fgroup_tagged['{}_{}'.format(f_group, str(i))] = (fgroup_atoms, fgroup_bonds) return fgroup_tagged
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData) if not oechem.OEParseCommandLine(itf, argv): oechem.OEThrow.Fatal("Unable to interpret command line!") iname = itf.GetString("-in") oname = itf.GetString("-out") smarts = itf.GetString("-smarts") qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smarts): oechem.OEThrow.Fatal("Invalid SMARTS: %s" % smarts) oechem.OEGenerate2DCoordinates(qmol) ss = oechem.OESubSearch(qmol) if not ss.IsValid(): oechem.OEThrow.Fatal("Unable to initialize substructure search!") ifs = oechem.oemolistream() if not ifs.open(iname): oechem.OEThrow.Fatal("Cannot open input molecule file!") ofs = oechem.oemolostream() if not ofs.open(oname): oechem.OEThrow.Fatal("Cannot open output file!") if not oechem.OEIs2DFormat(ofs.GetFormat()): oechem.OEThrow.Fatal("Invalid output format for 2D coordinates") for mol in ifs.GetOEGraphMols(): oechem.OEPrepareSearch(mol, ss) alignres = oedepict.OEPrepareAlignedDepiction(mol, ss) if not alignres.IsValid(): oechem.OEThrow.Warning( "Substructure is not found in input molecule!") oedepict.OEPrepareDepiction(mol) oechem.OEWriteMolecule(ofs, mol) return 0
def _check_nitro(molecule): """ Filter out nitro that is in ([NX3](=O)=O) form. OEGetReasonableTautomers generates this form. Parameters ---------- molecule : Returns ------- """ from openeye import oechem qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, '([NX3](=O)=O)'): print('OEParseSmarts failed') ss = oechem.OESubSearch(qmol) oechem.OEPrepareSearch(molecule, ss) matches = [m for m in ss.Match(molecule)] return bool(matches)
def smartsToQmol(self, smarts, messageTag=None): """Parse the input SMARTS query string and return a query molecule object (OeQMol). Args: smarts (str): SMARTS query string Returns: object : OeQMol() object or None for failure """ try: label = messageTag if messageTag else "" qmol = oechem.OEQMol() if oechem.OEParseSmarts(qmol, smarts): return qmol else: logger.debug("%s parsing failed for SMARTS string %s", label, smarts) logger.error("%s parsing failed for SMARTS string", label) except Exception as e: logger.exception("Failing with %s", str(e)) return None
oedepict.OEScale_AutoScale) disp = oedepict.OE2DMolDisplay(mol, opts) hstyle = oedepict.OEHighlightStyle_Color hcolor = oechem.OEColor(oechem.OELightBlue) oedepict.OEAddHighlighting(disp, hcolor, hstyle, match) ofs = oechem.oeosstream() oedepict.OERenderMolecule(ofs, 'png', disp) ofs.flush() return Image(data="".join(ofs.str())) # In[4]: Smarts = '[#6X4]-[#6X4]-[#8X2]' qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, Smarts): print('OEParseSmarts failed') ss = oechem.OESubSearch(qmol) # In[5]: fileprefix = 'AlkEthOH_dvrs1' ifs = oechem.oemolistream(fileprefix + '.oeb') # In[6]: mol = oechem.OEMol() for mol in ifs.GetOEMols(): goodMol = True oechem.OEPrepareSearch(mol, ss) unique = True
torlib = oeomega.OETorLib() # @ <SNIPPET-AddTorsionRule-string> # Adding the torsion rule "[O:1]=[C:2]-[O:3][CH3:4] 90" as a string # This takes precedent over previous rule rule = "[O:1]=[C:2]-[O:3][CH3:4] 90" if not torlib.AddTorsionRule(rule): oechem.OEThrow.Fatal("Failed to add torsion rule: %s" % rule) omegaOpts.SetTorLib(torlib) omega.SetOptions(omegaOpts) if omega(mol): oechem.OEWriteMolecule(ofs, mol) # @ </SNIPPET-AddTorsionRule-string> # @ <SNIPPET-AddTorsionRule-OEQMol> # Adding torsion rule "[O:1]=[C:2]-[O:3][CH3:4] 45" as a query # molecule. This takes precedent over default rule qmol = oechem.OEQMol() oechem.OEParseSmarts(qmol, "[O:1]=[C:2]-[O:3][CH3:4]") degrees = oechem.OEIntVector([45]) if not torlib.AddTorsionRule(qmol, degrees): oechem.OEThrow.Fatal("Failed to add torsion rule") omegaOpts.SetTorLib(torlib) omega.SetOptions(omegaOpts) if omega(mol): oechem.OEWriteMolecule(ofs, mol) # @ </SNIPPET-AddTorsionRule-OEQMol>
# SaaS offerings (each a "Customer"). # Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. # @ <SNIPPET> from openeye import oechem qmol = oechem.OEQMol() oechem.OEParseSmarts(qmol, "c1cc[o,n,s]c1") qscreen = oechem.OESubSearchScreen() oechem.OEMakeSubSearchQueryScreen(qscreen, qmol, oechem.OESubSearchScreenType_SMARTS) tmol = oechem.OEGraphMol() oechem.OEParseSmiles(tmol, "c1ccoc1") tscreen = oechem.OESubSearchScreen() oechem.OEMakeSubSearchTargetScreen(tscreen, tmol, oechem.OESubSearchScreenType_MDL) if oechem.OESameSubSearchScreenTypes(qscreen, tscreen): print("same screen types") else: print("different screen types") # @ </SNIPPET>
# # TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is # provided to current licensees or subscribers of OpenEye products or # SaaS offerings (each a "Customer"). # Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. from openeye import oechem # @ <SNIPPET> qmol = oechem.OEQMol() oechem.OEParseSmarts(qmol, "C1CC[O,N]CC1") stypelist = [ oechem.OESubSearchScreenType_Molecule, oechem.OESubSearchScreenType_MDL, oechem.OESubSearchScreenType_SMARTS ] for stype in stypelist: screentype = oechem.OEGetSubSearchScreenType(stype) if oechem.OEIsComplementaryScreenType(qmol, screentype): print(screentype.GetName(), 'screen type is complementary') # @ </SNIPPET>
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData) if not oechem.OEParseCommandLine(itf, argv): oechem.OEThrow.Fatal("Unable to interpret command line.") # check parameters c = itf.GetBool("-count") t = itf.GetBool("-titles") o = itf.HasString("-out") if not ((c and not t and not o) or (not c and t and not o) or (not c and not t and o)): oechem.OEThrow.Fatal( "Counting (-c) or outputting titles (-t) or molecules (-o) " "must be specified and are mutually exclusive.") ofs = oechem.oemolostream() if itf.HasString("-out"): ofname = itf.GetString("-out") if not ofs.open(ofname): oechem.OEThrow.Fatal("Cannot open output file!") dbfname = itf.GetString("-db") smarts = itf.GetString("-smarts") nrthreads = itf.GetUnsignedInt("-nrthreads") maxmatches = itf.GetUnsignedInt("-maxmatches") # initialize query qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smarts): oechem.OEThrow.Fatal("Unable to parse SMARTS pattern: %s" % smarts) # initialize substructure search database screentype = oechem.OEGetSubSearchScreenType( oechem.OESubSearchScreenType_SMARTS) if not oechem.OEIsValidSubSearchDatabase(dbfname, screentype): oechem.OEThrow.Fatal( "Invalid SMARTS substructure search database file!") ssdb = oechem.OESubSearchDatabase(oechem.OESubSearchDatabaseType_Default, nrthreads) tracer = oechem.OEConsoleProgressTracer() if not ssdb.Open(dbfname, tracer): oechem.OEThrow.Fatal( "Substructure search database can not be initialized!") screenstr = screentype.GetName() infomsg = "Using %d processor(s) to search database with '%s'" oechem.OEThrow.Info(infomsg % (ssdb.NumProcessors(), screenstr)) # search database if itf.GetBool("-count"): oechem.OEThrow.Info("Number of hits: %d" % ssdb.NumMatches(qmol)) else: query = oechem.OESubSearchQuery(qmol, maxmatches) result = oechem.OESubSearchResult() status = ssdb.Search(result, query) print("Search status = ", oechem.OESubSearchStatusToName(status)) print("Number of targets = ", result.NumTargets()) print("Number of screened = ", result.NumScreened()) print("Number of searched = ", result.NumSearched()) print("Number of total matches = ", result.NumTotalMatches()) print("Number of kept matches = ", result.NumMatches()) if itf.GetBool("-titles"): print("Matches:") for index in result.GetMatchIndices(): print(ssdb.GetTitle(index)) elif itf.HasString("-out"): mol = oechem.OEGraphMol() for index in result.GetMatchIndices(): if ssdb.GetMolecule(mol, index): oechem.OEWriteMolecule(ofs, mol) return 0
def generate_torsions(inp_mol, output_path, interval, base_name=None, tar=True): """ This function takes a 3D molecule (pdf, mol2 or sd file) and generates structures for a torsion drive on all torsions in the molecule. This function uses OpenEye Parameters ---------- mol : OEMol molecule to generate 1D torsion scans output_path: str path to output file directory interval: int angle (in degrees) of interval for torsion drive base_name: str base name for file. Default is None. If default, use title in OEMol for base name tar: bool If true, will compress output """ if not base_name: base_name = inp_mol.GetTitle() mid_tors = [[tor.a, tor.b, tor.c, tor.d] for tor in oechem.OEGetTorsions(inp_mol)] # This smarts should match terminal torsions such as -CH3, -NH2, -NH3+, -OH, and -SH smarts = '[*]~[*]-[X2H1,X3H2,X4H3]-[#1]' qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smarts): warnings.warn('OEParseSmarts failed') ss = oechem.OESubSearch(qmol) mol = oechem.OEMol(inp_mol) h_tors = [] oechem.OEPrepareSearch(mol, ss) unique = True for match in ss.Match(mol, unique): tor = [] for ma in match.GetAtoms(): tor.append(ma.target) h_tors.append(tor) # Combine middle and terminal torsions all_tors = mid_tors + h_tors # Sort all_tors so that it's grouped by central bond central_bonds = np.zeros((len(all_tors), 3), dtype=int) for i, tor in enumerate(all_tors): central_bonds[i][0] = i central_bonds[i][1] = tor[1].GetIdx() central_bonds[i][2] = tor[2].GetIdx() grouped = central_bonds[central_bonds[:, 2].argsort()] sorted_tors = [all_tors[i] for i in grouped[:, 0]] # Keep only one torsion per rotatable bond tors = [] best_tor = [ sorted_tors[0][0], sorted_tors[0][0], sorted_tors[0][0], sorted_tors[0][0] ] first_pass = True for tor in sorted_tors: logger().info("Idxs: {} {} {} {}".format(tor[0].GetIdx(), tor[1].GetIdx(), tor[2].GetIdx(), tor[3].GetIdx())) logger().info("Atom Numbers: {} {} {} {}".format( tor[0].GetAtomicNum(), tor[1].GetAtomicNum(), tor[2].GetAtomicNum(), tor[3].GetAtomicNum())) if tor[1].GetIdx() != best_tor[1].GetIdx() or tor[2].GetIdx( ) != best_tor[2].GetIdx(): new_tor = True if not first_pass: logger().info("Adding to list: {} {} {} {}".format( best_tor[0].GetIdx(), best_tor[1].GetIdx(), best_tor[2].GetIdx(), best_tor[3].GetIdx())) tors.append(best_tor) first_pass = False best_tor = tor best_tor_order = tor[0].GetAtomicNum() + tor[3].GetAtomicNum() logger().info( "new_tor with central bond across atoms: {} {}".format( tor[1].GetIdx(), tor[2].GetIdx())) else: logger().info("Not a new_tor but now with end atoms: {} {}".format( tor[0].GetIdx(), tor[3].GetIdx())) tor_order = tor[0].GetAtomicNum() + tor[3].GetAtomicNum() if tor_order > best_tor_order: best_tor = tor best_tor_order = tor_order logger().info("Adding to list: {} {} {} {}".format(best_tor[0].GetIdx(), best_tor[1].GetIdx(), best_tor[2].GetIdx(), best_tor[3].GetIdx())) tors.append(best_tor) logger().info("List of torsion to drive:") for tor in tors: logger().info("Idx: {} {} {} {}".format(tor[0].GetIdx(), tor[1].GetIdx(), tor[2].GetIdx(), tor[3].GetIdx())) logger().info("Atom numbers: {} {} {} {}".format( tor[0].GetAtomicNum(), tor[1].GetAtomicNum(), tor[2].GetAtomicNum(), tor[3].GetAtomicNum())) conf = mol.GetConfs().next() coords = oechem.OEFloatArray(conf.GetMaxAtomIdx() * 3) conf.GetCoords(coords) # Check if coordinates are not zero values = np.asarray( [coords.__getitem__(i) == 0 for i in range(coords.__len__())]) if values.all(): # Generate new coordinates. mol2 = generate_conformers(mol, max_confs=1) conf = mol2.GetConfs().next() coords = oechem.OEFloatArray(conf.GetMaxAtomIdx() * 3) conf.GetCoords(coords) mol2.DeleteConfs() mol.DeleteConfs() for tor in tors: tor_name = str((tor[0].GetIdx()) + 1) + '_' + str( (tor[1].GetIdx()) + 1) + '_' + str( (tor[2].GetIdx()) + 1) + '_' + str((tor[3].GetIdx()) + 1) folder = os.path.join(output_path, tor_name) try: os.makedirs(folder) except FileExistsError: logger().info("Overwriting existing directory {}".format(tor_name)) for angle in range(0, 360, interval): angle_folder = os.path.join(folder, str(angle)) try: os.mkdir(angle_folder) except FileExistsError: logger().info( "Overwriting existing directory {}".format(tor_name)) newconf = mol.NewConf(coords) oechem.OESetTorsion(newconf, tor[0], tor[1], tor[2], tor[3], radians(angle)) pdb = oechem.oemolostream('{}/{}_{}_{}.pdb'.format( angle_folder, base_name, tor_name, angle)) oechem.OEWritePDBFile(pdb, newconf) if tar: # tar archive output out = tarfile.open('{}.tar.gz'.format(output_path), mode='w:gz') os.chdir(output_path) os.chdir('../') out.add('{}'.format(base_name)) out.close()