def prefilterIndex(self, oeQueryMol, idxP, matchOpts="relaxed", skipFeatures=False): """Filter the full search index base on minimum chemical formula an feature criteria. Args: oeQueryMol (object): search target moleculed (OEMol) idxP (object): instance ChemCompSearchIndexProvider() matchOpts (str, optional): search criteria options. Defaults to "default". skipFeatures (bool, optional): skip feature filters. Defaults to False. Returns: (list): list of chemical component identifiers in the filtered search space """ startTime = time.time() oemf = OeMoleculeFactory() oemf.setOeMol(oeQueryMol, "queryTarget") typeCountD = oemf.getElementCounts(useSymbol=True) # ccIdL1 = idxP.filterMinimumMolecularFormula(typeCountD) # featureCountD = oemf.getFeatureCounts() if not skipFeatures else {} # Adjust filter according to search options if matchOpts in matchOpts in [ "relaxed", "graph-relaxed", "simple", "sub-struct-graph-relaxed" ]: for ky in ["rings_ar", "at_ar", "at_ch"]: featureCountD.pop(ky, None) elif matchOpts in [ "relaxed-stereo", "graph-relaxed-stereo", "sub-struct-graph-relaxed-stereo", "graph-relaxed-stereo-sdeq", "sub-struct-graph-relaxed-stereo-sdeq" ]: for ky in ["rings_ar", "at_ar"]: featureCountD.pop(ky, None) elif matchOpts in [ "default", "strict", "graph-strict", "graph-default", "sub-struct-graph-strict" ]: pass ccIdL = idxP.filterMinimumFormulaAndFeatures(typeCountD, featureCountD) logger.info( "Pre-filtering results for formula+feature %d (%.4f seconds)", len(ccIdL), time.time() - startTime) return ccIdL
def write(self, filePath, oeMol, constantMol=False, addSdTags=True): """Write an oeMol with format type inferred from the filePath extension (e.g. .mol) Args: filePath (str): filepath with a chemical type extension constantMol (bool, optional): copies molecule before performing format specific perceptions Returns: bool: True for success or False otherwise """ try: molId = os.path.splitext(os.path.basename(filePath))[0] fmt = os.path.splitext(os.path.basename(filePath))[1][1:].lower() # if addSdTags: oemf = OeMoleculeFactory() oemf.setOeMol(oeMol, molId) oemf.addSdTags() oeMol = oemf.getMol() # self.__mU.mkdir(os.path.dirname(filePath)) ofs = oechem.oemolostream() ofs.open(filePath) logger.debug("Writing (fmt=%s) molId %s path %s title %s", fmt, molId, filePath, oeMol.GetTitle()) # if constantMol: oechem.OEWriteConstMolecule(ofs, oeMol) else: oechem.OEWriteMolecule(ofs, oeMol) # # If this is a mol2 file, we need to replace the resname if fmt.startswith("mol2"): # If this is a mol2/mol2h substitute the default substructure id with open(filePath, "r", encoding="utf-8") as ifh: lines = ifh.readlines() lines = [line.replace("<0>", molId) for line in lines] with open(filePath, "w", encoding="utf-8") as ofh: ofh.writelines(lines) return True except Exception as e: logger.exception("Failing for %s with %s", filePath, str(e)) return False
def __getMiscFile(self, filePath, suppressHydrogens=False, importType="2D", title=None, largestPart=False): """Fetch a miscellaneous chemical file (ccPath) and build OE molecules for comparison. """ try: oeioU = OeIoUtils() oeMolL = oeioU.fileToMols(filePath, use3D=importType == "3D", largestPart=largestPart) logger.info("Read (%d) from %s ", len(oeMolL), filePath) oeMol = oeMolL[0] ccId = title if title else oeMol.GetTitle() if title: oeMol.SetTitle(ccId) # oemf = OeMoleculeFactory() if not self.__verbose: oemf.setQuiet() oemf.setOeMol(oeMol, ccId) # fD = oemf.getOeMoleculeFeatures() if self.__verbose: logger.info(" Title = %s", title) logger.info(" Title OEMF = %s", oemf.getTitle()) logger.info(" SMILES = %s", oemf.getCanSMILES()) logger.info(" SMILES (stereo) = %s", oemf.getIsoSMILES()) logger.info(" Formula (Hill) = %s", oemf.getFormula()) logger.info(" InChI key = %s", oemf.getInChIKey()) logger.info(" InChI = %s", oemf.getInChI()) # ccId = oemf.getTitle() if suppressHydrogens: tMol = oemf.getGraphMolSuppressH() else: tMol = oemf.getMol() molXyzL = [] if importType == "3D": for atm in tMol.GetAtoms(): xyzL = oechem.OEFloatArray(3) tMol.GetCoords(atm, xyzL) molXyzL.append( ComponentAtomDetails( atIdx=atm.GetIdx(), atNo=atm.GetAtomicNum(), atName=atm.GetName(), atType=atm.GetType(), x=xyzL[0], y=xyzL[1], z=xyzL[2], atFormalCharge=atm.GetFormalCharge(), ) ) fD = {} fD = { "Formula": oemf.getFormula(), "SMILES": oemf.getCanSMILES(), "SMILES_STEREO": oemf.getIsoSMILES(), "InChI": oemf.getInChI(), "InChIKey": oemf.getInChIKey(), "xyz": molXyzL, } for atm in tMol.GetAtoms(): xyzL = oechem.OEFloatArray(3) tMol.GetCoords(atm, xyzL) if self.__verbose: logger.debug("atom %s %s %s %s %r", atm.GetIdx(), atm.GetAtomicNum(), atm.GetName(), atm.GetType(), xyzL) fD["OEMOL"] = tMol return (ccId, tMol, fD) except Exception as e: logger.exception("Failing with %s", str(e)) return None, None, None