def Reset(self): if not self.__needReset: return self.__orVect = None if not self.__vects: return ks = list(iterkeys(self.__vects)) self.__orVect = copy.copy(self.__vects[ks[0]]) self.__numBits = self.__orVect.GetNumBits() for i in range(1, len(ks)): self.__orVect |= self.__vects[ks[i]] self.__needReset = False
# from __future__ import print_function from rdkit.six.moves import cPickle from rdkit.six import iterkeys from rdkit import DataStructs,Chem from rdkit import Chem similarityMethods={'RDK':DataStructs.ExplicitBitVect, 'AtomPairs':DataStructs.IntSparseIntVect, 'TopologicalTorsions':DataStructs.LongSparseIntVect, 'Pharm2D':DataStructs.SparseBitVect, 'Gobbi2D':DataStructs.SparseBitVect, 'Morgan':DataStructs.UIntSparseIntVect, 'Avalon':DataStructs.ExplicitBitVect, } supportedSimilarityMethods=list(iterkeys(similarityMethods)) class LayeredOptions: loadLayerFlags=0xFFFFFFFF searchLayerFlags=0x7 minPath=1 maxPath=6 fpSize=1024 wordSize=32 nWords=fpSize//wordSize @staticmethod def GetFingerprint(mol,query=True): if query: flags=LayeredOptions.searchLayerFlags else:
def RecapDecompose(mol, allNodes=None, minFragmentSize=0, onlyUseReactions=None): """ returns the recap decomposition for a molecule """ mSmi = Chem.MolToSmiles(mol, 1) if allNodes is None: allNodes = {} if mSmi in allNodes: return allNodes[mSmi] res = RecapHierarchyNode(mol) res.smiles = mSmi activePool = {mSmi: res} allNodes[mSmi] = res while activePool: nSmi = next(iterkeys(activePool)) node = activePool.pop(nSmi) if not node.mol: continue for rxnIdx, reaction in enumerate(reactions): if onlyUseReactions and rxnIdx not in onlyUseReactions: continue #print ' .',nSmi #print ' !!!!',rxnIdx,nSmi,reactionDefs[rxnIdx] ps = reaction.RunReactants((node.mol, )) #print ' ',len(ps) if ps: for prodSeq in ps: seqOk = True # we want to disqualify small fragments, so sort the product sequence by size # and then look for "forbidden" fragments tSeq = [(prod.GetNumAtoms(onlyExplicit=True), idx) for idx, prod in enumerate(prodSeq)] tSeq.sort() ts = [(x, prodSeq[y]) for x, y in tSeq] prodSeq = ts for nats, prod in prodSeq: try: Chem.SanitizeMol(prod) except: continue pSmi = Chem.MolToSmiles(prod, 1) if minFragmentSize > 0: nDummies = pSmi.count('*') if nats - nDummies < minFragmentSize: seqOk = False break # don't forget after replacing dummy atoms to remove any empty # branches: elif pSmi.replace('[*]', '').replace('()', '') in ('', 'C', 'CC', 'CCC'): seqOk = False break prod.pSmi = pSmi if seqOk: for nats, prod in prodSeq: pSmi = prod.pSmi #print '\t',nats,pSmi if not pSmi in allNodes: pNode = RecapHierarchyNode(prod) pNode.smiles = pSmi pNode.parents[nSmi] = weakref.proxy(node) node.children[pSmi] = pNode activePool[pSmi] = pNode allNodes[pSmi] = pNode else: pNode = allNodes[pSmi] pNode.parents[nSmi] = weakref.proxy(node) node.children[pSmi] = pNode #print ' >>an:',allNodes.keys() return res
def BRICSDecompose(mol,allNodes=None,minFragmentSize=1,onlyUseReactions=None, silent=True,keepNonLeafNodes=False,singlePass=False,returnMols=False): """ returns the BRICS decomposition for a molecule >>> from rdkit import Chem >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') >>> res = list(BRICSDecompose(m)) >>> sorted(res) ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]'] >>> res = list(BRICSDecompose(m,returnMols=True)) >>> res[0] <rdkit.Chem.rdchem.Mol object ...> >>> smis = [Chem.MolToSmiles(x,True) for x in res] >>> sorted(smis) ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]'] nexavar, an example from the paper (corrected): >>> m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1') >>> res = list(BRICSDecompose(m)) >>> sorted(res) ['[1*]C([1*])=O', '[1*]C([6*])=O', '[14*]c1cc([16*])ccn1', '[16*]c1ccc(Cl)c([16*])c1', '[16*]c1ccc([16*])cc1', '[3*]O[3*]', '[5*]NC', '[5*]N[5*]', '[8*]C(F)(F)F'] it's also possible to keep pieces that haven't been fully decomposed: >>> m = Chem.MolFromSmiles('CCCOCC') >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True)) >>> sorted(res) ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[3*]O[3*]', '[4*]CC', '[4*]CCC'] >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True)) >>> sorted(res) ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[16*]c1cccc([16*])c1', '[3*]OCCC', '[3*]OC[8*]', '[3*]OCc1cccc(-c2ccccn2)c1', '[3*]OCc1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]', '[4*]Cc1cccc(-c2ccccn2)c1', '[4*]Cc1cccc([16*])c1', '[8*]COCCC'] or to only do a single pass of decomposition: >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') >>> res = list(BRICSDecompose(m,singlePass=True)) >>> sorted(res) ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[3*]OCCC', '[3*]OCc1cccc(-c2ccccn2)c1', '[4*]CCC', '[4*]Cc1cccc(-c2ccccn2)c1', '[8*]COCCC'] setting a minimum size for the fragments: >>> m = Chem.MolFromSmiles('CCCOCC') >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=2)) >>> sorted(res) ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC'] >>> m = Chem.MolFromSmiles('CCCOCC') >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=3)) >>> sorted(res) ['CCCOCC', '[3*]OCC', '[4*]CCC'] >>> res = list(BRICSDecompose(m,minFragmentSize=2)) >>> sorted(res) ['[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC'] """ global reactions mSmi = Chem.MolToSmiles(mol,1) if allNodes is None: allNodes=set() if mSmi in allNodes: return set() activePool={mSmi:mol} allNodes.add(mSmi) foundMols={mSmi:mol} for gpIdx,reactionGp in enumerate(reactions): newPool = {} while activePool: matched=False nSmi = next(iterkeys(activePool)) mol = activePool.pop(nSmi) for rxnIdx,reaction in enumerate(reactionGp): if onlyUseReactions and (gpIdx,rxnIdx) not in onlyUseReactions: continue if not silent: print('--------') print(smartsGps[gpIdx][rxnIdx]) ps = reaction.RunReactants((mol,)) if ps: if not silent: print(nSmi,'->',len(ps),'products') for prodSeq in ps: seqOk=True # we want to disqualify small fragments, so sort the product sequence by size tSeq = [(prod.GetNumAtoms(onlyExplicit=True),idx) for idx,prod in enumerate(prodSeq)] tSeq.sort() for nats,idx in tSeq: prod = prodSeq[idx] try: Chem.SanitizeMol(prod) except: continue pSmi = Chem.MolToSmiles(prod,1) if minFragmentSize>0: nDummies = pSmi.count('*') if nats-nDummies<minFragmentSize: seqOk=False break prod.pSmi = pSmi ts = [(x,prodSeq[y]) for x,y in tSeq] prodSeq=ts if seqOk: matched=True for nats,prod in prodSeq: pSmi = prod.pSmi #print('\t',nats,pSmi) if pSmi not in allNodes: if not singlePass: activePool[pSmi] = prod allNodes.add(pSmi) foundMols[pSmi]=prod if singlePass or keepNonLeafNodes or not matched: newPool[nSmi]=mol activePool = newPool if not (singlePass or keepNonLeafNodes): if not returnMols: res = set(activePool.keys()) else: res = activePool.values() else: if not returnMols: res = allNodes else: res = foundMols.values() return res
# from __future__ import print_function from rdkit.six.moves import cPickle from rdkit.six import iterkeys from rdkit import DataStructs, Chem from rdkit import Chem similarityMethods = { 'RDK': DataStructs.ExplicitBitVect, 'AtomPairs': DataStructs.IntSparseIntVect, 'TopologicalTorsions': DataStructs.LongSparseIntVect, 'Pharm2D': DataStructs.SparseBitVect, 'Gobbi2D': DataStructs.SparseBitVect, 'Morgan': DataStructs.UIntSparseIntVect } supportedSimilarityMethods = list(iterkeys(similarityMethods)) class LayeredOptions: loadLayerFlags = 0xFFFFFFFF searchLayerFlags = 0x7 minPath = 1 maxPath = 6 fpSize = 1024 wordSize = 32 nWords = fpSize // wordSize @staticmethod def GetFingerprint(mol, query=True): if query: flags = LayeredOptions.searchLayerFlags
def RecapDecompose(mol,allNodes=None,minFragmentSize=0,onlyUseReactions=None): """ returns the recap decomposition for a molecule """ mSmi = Chem.MolToSmiles(mol,1) if allNodes is None: allNodes={} if mSmi in allNodes: return allNodes[mSmi] res = RecapHierarchyNode(mol) res.smiles =mSmi activePool={mSmi:res} allNodes[mSmi]=res while activePool: nSmi = next(iterkeys(activePool)) node = activePool.pop(nSmi) if not node.mol: continue for rxnIdx,reaction in enumerate(reactions): if onlyUseReactions and rxnIdx not in onlyUseReactions: continue #print ' .',nSmi #print ' !!!!',rxnIdx,nSmi,reactionDefs[rxnIdx] ps = reaction.RunReactants((node.mol,)) #print ' ',len(ps) if ps: for prodSeq in ps: seqOk=True # we want to disqualify small fragments, so sort the product sequence by size # and then look for "forbidden" fragments tSeq = [(prod.GetNumAtoms(onlyExplicit=True),idx) for idx,prod in enumerate(prodSeq)] tSeq.sort() ts=[(x,prodSeq[y]) for x,y in tSeq] prodSeq=ts for nats,prod in prodSeq: try: Chem.SanitizeMol(prod) except: continue pSmi = Chem.MolToSmiles(prod,1) if minFragmentSize>0: nDummies = pSmi.count('*') if nats-nDummies<minFragmentSize: seqOk=False break # don't forget after replacing dummy atoms to remove any empty # branches: elif pSmi.replace('[*]','').replace('()','') in ('','C','CC','CCC'): seqOk=False break prod.pSmi = pSmi if seqOk: for nats,prod in prodSeq: pSmi = prod.pSmi #print '\t',nats,pSmi if not pSmi in allNodes: pNode = RecapHierarchyNode(prod) pNode.smiles=pSmi pNode.parents[nSmi]=weakref.proxy(node) node.children[pSmi]=pNode activePool[pSmi] = pNode allNodes[pSmi]=pNode else: pNode=allNodes[pSmi] pNode.parents[nSmi]=weakref.proxy(node) node.children[pSmi]=pNode #print ' >>an:',allNodes.keys() return res
def BRICSDecompose(mol, allNodes=None, minFragmentSize=1, onlyUseReactions=None, silent=True, keepNonLeafNodes=False, singlePass=False, returnMols=False): """ returns the BRICS decomposition for a molecule >>> from rdkit import Chem >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') >>> res = list(BRICSDecompose(m)) >>> sorted(res) ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]'] >>> res = list(BRICSDecompose(m,returnMols=True)) >>> res[0] <rdkit.Chem.rdchem.Mol object ...> >>> smis = [Chem.MolToSmiles(x,True) for x in res] >>> sorted(smis) ['[14*]c1ccccn1', '[16*]c1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]'] nexavar, an example from the paper (corrected): >>> m = Chem.MolFromSmiles('CNC(=O)C1=NC=CC(OC2=CC=C(NC(=O)NC3=CC(=C(Cl)C=C3)C(F)(F)F)C=C2)=C1') >>> res = list(BRICSDecompose(m)) >>> sorted(res) ['[1*]C([1*])=O', '[1*]C([6*])=O', '[14*]c1cc([16*])ccn1', '[16*]c1ccc(Cl)c([16*])c1', '[16*]c1ccc([16*])cc1', '[3*]O[3*]', '[5*]NC', '[5*]N[5*]', '[8*]C(F)(F)F'] it's also possible to keep pieces that haven't been fully decomposed: >>> m = Chem.MolFromSmiles('CCCOCC') >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True)) >>> sorted(res) ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[3*]O[3*]', '[4*]CC', '[4*]CCC'] >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True)) >>> sorted(res) ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[16*]c1cccc([16*])c1', '[3*]OCCC', '[3*]OC[8*]', '[3*]OCc1cccc(-c2ccccn2)c1', '[3*]OCc1cccc([16*])c1', '[3*]O[3*]', '[4*]CCC', '[4*]C[8*]', '[4*]Cc1cccc(-c2ccccn2)c1', '[4*]Cc1cccc([16*])c1', '[8*]COCCC'] or to only do a single pass of decomposition: >>> m = Chem.MolFromSmiles('CCCOCc1cc(c2ncccc2)ccc1') >>> res = list(BRICSDecompose(m,singlePass=True)) >>> sorted(res) ['CCCOCc1cccc(-c2ccccn2)c1', '[14*]c1ccccn1', '[16*]c1cccc(-c2ccccn2)c1', '[16*]c1cccc(COCCC)c1', '[3*]OCCC', '[3*]OCc1cccc(-c2ccccn2)c1', '[4*]CCC', '[4*]Cc1cccc(-c2ccccn2)c1', '[8*]COCCC'] setting a minimum size for the fragments: >>> m = Chem.MolFromSmiles('CCCOCC') >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=2)) >>> sorted(res) ['CCCOCC', '[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC'] >>> m = Chem.MolFromSmiles('CCCOCC') >>> res = list(BRICSDecompose(m,keepNonLeafNodes=True,minFragmentSize=3)) >>> sorted(res) ['CCCOCC', '[3*]OCC', '[4*]CCC'] >>> res = list(BRICSDecompose(m,minFragmentSize=2)) >>> sorted(res) ['[3*]OCC', '[3*]OCCC', '[4*]CC', '[4*]CCC'] """ global reactions mSmi = Chem.MolToSmiles(mol, 1) if allNodes is None: allNodes = set() if mSmi in allNodes: return set() activePool = {mSmi: mol} allNodes.add(mSmi) foundMols = {mSmi: mol} for gpIdx, reactionGp in enumerate(reactions): newPool = {} while activePool: matched = False nSmi = next(iterkeys(activePool)) mol = activePool.pop(nSmi) for rxnIdx, reaction in enumerate(reactionGp): if onlyUseReactions and (gpIdx, rxnIdx) not in onlyUseReactions: continue if not silent: print('--------') print(smartsGps[gpIdx][rxnIdx]) ps = reaction.RunReactants((mol, )) if ps: if not silent: print(nSmi, '->', len(ps), 'products') for prodSeq in ps: seqOk = True # we want to disqualify small fragments, so sort the product sequence by size tSeq = [(prod.GetNumAtoms(onlyExplicit=True), idx) for idx, prod in enumerate(prodSeq)] tSeq.sort() for nats, idx in tSeq: prod = prodSeq[idx] try: Chem.SanitizeMol(prod) except Exception: continue pSmi = Chem.MolToSmiles(prod, 1) if minFragmentSize > 0: nDummies = pSmi.count('*') if nats - nDummies < minFragmentSize: seqOk = False break prod.pSmi = pSmi ts = [(x, prodSeq[y]) for x, y in tSeq] prodSeq = ts if seqOk: matched = True for nats, prod in prodSeq: pSmi = prod.pSmi #print('\t',nats,pSmi) if pSmi not in allNodes: if not singlePass: activePool[pSmi] = prod allNodes.add(pSmi) foundMols[pSmi] = prod if singlePass or keepNonLeafNodes or not matched: newPool[nSmi] = mol activePool = newPool if not (singlePass or keepNonLeafNodes): if not returnMols: res = set(activePool.keys()) else: res = activePool.values() else: if not returnMols: res = allNodes else: res = foundMols.values() return res