def get_fragments(self): fragments = None if False not in [self.is_mol(), self.is_small(), self.has_2_rings()]: # 3 requirements fulfilled fragments = [] # 1st add scf of the fragments hierarch = Recap.RecapDecompose(self.get_mol()) ks = hierarch.children for s, obj in ks.items(): m = obj.mol if (m is None) or (Chem.GetSSSR(m) < 2): continue # Fragments into scaffolds conversion try: core = MurckoScaffold.GetScaffoldForMol(m) except ValueError: # scf calculation not possible continue smiles_scf = Chem.MolToSmiles(core) if Chem.MolFromSmiles(smiles_scf) is None: continue fragments.append(smiles_scf) # 2nd add scf of itself try: core = MurckoScaffold.GetScaffoldForMol(self.get_mol()) smiles_scf = Chem.MolToSmiles(core) if Chem.MolFromSmiles(smiles_scf) is not None: fragments.append(smiles_scf) except ValueError: # scf calculation not possible pass return (fragments)
def AddMurckoToFrame(frame, molCol='ROMol', MurckoCol='Murcko_SMILES', Generic=False): ''' Adds column with SMILES of Murcko scaffolds to pandas DataFrame. Generic set to true results in SMILES of generic framework. ''' if Generic: func = lambda x: Chem.MolToSmiles(MurckoScaffold.MakeScaffoldGeneric( MurckoScaffold.GetScaffoldForMol(x[molCol]))) else: func = lambda x: Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(x[molCol])) frame[MurckoCol] = frame.apply(func, axis=1)
def calc_scaffold_similarity(s1: str, s2: str) -> float: mol1 = Chem.MolFromSmiles(s1) mol2 = Chem.MolFromSmiles(s2) if mol1 is None or mol2 is None: return -1.0 try: scafold1 = MurckoScaffold.GetScaffoldForMol(mol1) scafold2 = MurckoScaffold.GetScaffoldForMol(mol2) f1 = AllChem.GetMorganFingerprint(scafold1, 3) f2 = AllChem.GetMorganFingerprint(scafold2, 3) return DataStructs.TanimotoSimilarity(f1, f2) except Exception: return -1.0
def pipe_sim_filter(stream, query, cutoff=80, summary=None, comp_id="pipe_sim_filter"): """Filter for compounds that have a similarity greater or equal than `cutoff` (in percent) to the `query` Smiles. If the field `FP_b64` (e.g. pre-calculated) is present, this will be used, otherwise the fingerprint of the Murcko scaffold will be generated on-the-fly (much slower).""" rec_counter = 0 query_mol = Chem.MolFromSmiles(query) if not query_mol: print("* {} ERROR: could not generate query from SMILES.".format( comp_id)) return None murcko_mol = MurckoScaffold.GetScaffoldForMol(query_mol) if USE_FP == "morgan": query_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect( murcko_mol, 2) elif USE_FP == "avalon": query_fp = pyAv.GetAvalonFP(murcko_mol, 1024) else: query_fp = FingerprintMols.FingerprintMol(murcko_mol) for rec in stream: if "mol" not in rec: continue if "FP_b64" in rec: # use the pre-defined fingerprint if it is present in the stream mol_fp = pickle.loads(b64.b64decode(rec["FP_b64"])) else: murcko_mol = MurckoScaffold.GetScaffoldForMol(rec["mol"]) if USE_FP == "morgan": mol_fp = Desc.rdMolDescriptors.GetMorganFingerprintAsBitVect( murcko_mol, 2) elif USE_FP == "avalon": mol_fp = pyAv.GetAvalonFP(murcko_mol, 1024) else: mol_fp = FingerprintMols.FingerprintMol(murcko_mol) sim = DataStructs.FingerprintSimilarity(query_fp, mol_fp) if sim * 100 >= cutoff: rec_counter += 1 rec["Sim"] = np.round(sim * 100, 2) if summary is not None: summary[comp_id] = rec_counter yield rec
def GetScaffold(mol, generic_framework=False): """ ################################################################# Calculate Scaffold Usage: result = GetScaffold(mol) Input: mol is a molecule object. generic_framework is boolean value. If the generic_framework is True, the result returns a generic framework. Output: result is a string form of the molecule's scaffold. ################################################################# """ core = MurckoScaffold.GetScaffoldForMol(mol) if generic_framework == True: fw = MurckoScaffold.MakeScaffoldGeneric(core) mol_generic_framework = Chem.MolToSmiles(fw) return mol_generic_framework else: mol_scafflod = Chem.MolToSmiles(core) return mol_scafflod
def getScaffold(self, smile): mol = Chem.MolFromSmiles(smile) if mol: scaffold = MurckoScaffold.GetScaffoldForMol(mol) return Chem.MolToSmiles(scaffold, isomericSmiles=False) else: return ''
def get_annotated_murcko_scaffold(mol, scaffold=None, as_mol=True): """ Return an annotated murcko scaffold where side chains are replaced with a dummy atom ('*'). Parameters ---------- mol : rdkit.Chem.rdchem.Mol scaffold : rdkit.Chem.rdchem.Mol, optional If a murcko scaffold is already calculated for the `mol`, this can be supplied as a template. The default is None. as_mol : bool, optional If True return rdkit.Chem.rdchem.Mol object else return a SMILES string representation. The default is True. Returns ------- {str, rdkit.Chem.rdchem.Mol} Annotated Murcko scaffold. """ if not scaffold: scaffold = MurckoScaffold.GetScaffoldForMol(mol) annotated = rdmolops.ReplaceSidechains(mol, scaffold) if as_mol: return annotated if annotated is None: return '' return MolToSmiles(annotated)
def SMILES_2_ECFP(smiles, radius=3, bit_len=4096, index=None): """ This function transforms a list of SMILES strings into a list of ECFP with radius 3. ---------- smiles: List of SMILES strings to transform Returns ------- This function return the SMILES strings transformed into a vector of 4096 elements """ fps = np.zeros((len(smiles), bit_len)) for i, smile in enumerate(smiles): mol = Chem.MolFromSmiles(smile) arr = np.zeros((1, )) try: mol = MurckoScaffold.GetScaffoldForMol(mol) fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=bit_len) DataStructs.ConvertToNumpyArray(fp, arr) fps[i, :] = arr except: print(smile) fps[i, :] = [0] * bit_len return pd.DataFrame(fps, index=(smiles if index is None else index))
def get_murcko_scaffold(smiles_dict): """Reads a smile dictionary in this format 'CHEMBL189352': 'COc1ccc2c(cnn2n1)c3ccnc(Nc4ccc(cc4)C#N)n3' Returns a dictionary of Murcko scaffolds with the corresponding molecules 'Cc1n[nH]c2ccc(cc12)c3cncc(OC[C@@H](N)Cc4ccccc4)c3': 'CHEMBL379218' :param smiles_dict: smiles dictionary :return: dictionary of scaffolds and chembl_id """ smiles_list = smiles_dict.values() chembl_id_list = smiles_dict.keys() mols_list = [Chem.MolFromSmiles(x) for x in smiles_list] scaffolds = {} for mol, chembl_id in zip(mols_list, chembl_id_list): try: core = MurckoScaffold.GetScaffoldForMol(mol) scaffold = Chem.MolToSmiles(core) except Exception as e: print("rdkit could not read {}".format(chembl_id)) if scaffold in scaffolds: scaffolds[scaffold].append(chembl_id) else: scaffolds[scaffold] = [] scaffolds[scaffold].append(chembl_id) return scaffolds
def extract_murcko_scaffolds(mols, verbose=True): """ Extract Bemis-Murcko scaffolds from a smile string. :param mols: molecule data set in rdkit mol format. :return: smiles string of a scaffold and a framework. """ scaf = [] scaf_unique = [] generic_scaf = [] generic_scaf_unique = [] start = time.time() for mol in mols: if mol is None: continue try: core = MurckoScaffold.GetScaffoldForMol(mol) fw = MurckoScaffold.MakeScaffoldGeneric(core) scaf.append(Chem.MolToSmiles(core, isomericSmiles=True)) generic_scaf.append(Chem.MolToSmiles(fw, isomericSmiles=True)) except ValueError as e: print(e) scaf.append(['error']) generic_scaf.append(['error']) if verbose: print('Extracted', len(scaf), 'scaffolds in', time.time() - start, 'seconds.') return scaf, generic_scaf
def main(name, argv): if not len(argv) == 2: print_usage(name) return rxn = rdChemReactions.ReactionFromSmarts( '[S:1](=[O:2])(=[O:3])F>>[S:1](=[O:2])(=[O:3])n1nnnn1') reactents_smarts = rxn.GetReactants() back = rdChemReactions.ReactionFromSmarts('[S:1]n1nnnn1>>[S:1]F') back_smarts = back.GetReactants() with open(argv[0], 'r') as f: lines = f.readlines() smiles = [line.split() for line in lines] with open(argv[1], 'w') as f: for line in smiles: mol = [Chem.MolFromSmiles(line[0]), line[0], line[1]] if mol is None: continue if not mol[0].HasSubstructMatch(reactents_smarts[0]): continue fake_ring = rxn.RunReactants((mol[0], ))[0][0] fake_ring = Chem.MolFromSmiles(Chem.MolToSmiles(fake_ring)) core = MurckoScaffold.GetScaffoldForMol(fake_ring) if core.HasSubstructMatch(back_smarts[0]): scaffold = back.RunReactants((core, ))[0][0] else: scaffold = Chem.MolFromSmiles('S(=O)(=O)F') f.write('%s\t%s\t%s\n' % (Chem.MolToSmiles(scaffold), line[0], line[1]))
def extract_side_chains(mol, remove_duplicates=False, mark='[*]'): """ Extract side chains from a smiles string. Core is handled as Murcko scaffold. :param mol: {str} smiles string of a molecule. :param remove_duplicates: {bool} Keep or remove duplicates. :param mark: character to mark attachment points. :return: smiles strings of side chains in a list, attachment points replaced by [R]. """ pos = range(0, 20) set_pos = ['[' + str(x) + '*]' for x in pos] m1 = MolFromSmiles(mol) try: core = MurckoScaffold.GetScaffoldForMol(m1) side_chain = ReplaceCore(m1, core) smi = MolToSmiles(side_chain, isomericSmiles=True ) # isomericSmiles adds a number to the dummy atoms. except: return list() for i in pos: smi = smi.replace(''.join(set_pos[i]), mark) if remove_duplicates: return list(set(smi.split('.'))) else: return smi.split('.')
def __call__(self, smiles, radius=3, bit_len=4096, scaffold=0): fps = np.zeros((len(smiles), bit_len)) for i, smile in enumerate(smiles): mol = Chem.MolFromSmiles(smile) arr = np.zeros((1, )) try: if scaffold == 1: mol = MurckoScaffold.GetScaffoldForMol(mol) elif scaffold == 2: mol = MurckoScaffold.MakeScaffoldGeneric(mol) if not mol: raise Exception( f'Failed to calculate Morgan fingerprint (creating RDKit instance from smiles failed: {smile})' ) fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=bit_len) DataStructs.ConvertToNumpyArray(fp, arr) fps[i, :] = arr except Exception as exp: # TODO: use a more specific exception related to descriptor errors # traceback.print_exc() self.builder.errors.append(exp) fps[i, :] = [0] * bit_len return pd.DataFrame(fps)
def findCluster(self, smiles): mol = Chem.MolFromSmiles(smiles) if mol: try: scaffold = MurckoScaffold.GetScaffoldForMol(mol) except: return "", "", False if scaffold: cluster = Chem.MolToSmiles(scaffold, isomericSmiles=False) else: return "", "", False else: return "", "", False fp = Pairs.GetAtomPairFingerprint(scaffold) # Change to Tanimoto? if cluster in self.getFingerprints(): return cluster, fp, False fps = list(self.getFingerprints().values()) sims = DataStructs.BulkTanimotoSimilarity(fp, fps) if len(sims) == 0: return cluster, fp, True closest = np.argmax(sims) if sims[closest] >= self.minsimilarity: return list(self.getFingerprints().keys())[closest], fp, False else: return cluster, fp, True
def generate_molecule_image(path, limit=25): from rdkit.Chem.Scaffolds import MurckoScaffold from rdkit.Chem import MolFromSmiles from rdkit.Chem.Draw import MolToImage from PIL import ImageDraw if os.path.exists(path): # Read the hits file smiles = [] ids = [] with open(path, 'r') as top_hits: for line_number, line in enumerate(top_hits.readlines()): if line_number >= limit: break smiles.append(line.split(" ")[0]) ids.append(line.split(" ")[1]) # Generate scaffold for smile, mid in zip(smiles, ids): mol = MurckoScaffold.GetScaffoldForMol(MolFromSmiles(smile)) image = MolToImage(mol) # Add text to the image draw = ImageDraw.Draw(image) draw.text((5, 5), mid, fill="black", align="right") image.save("GUI/images/molecules/{}.png".format(smile)) else: return
def get_scaffolds(compounds): for i, c in enumerate(compounds): mol = Chem.MolFromSmiles(c["canonical_smiles"]) core = MurckoScaffold.GetScaffoldForMol(mol) compounds[i]["scaffold"] = Chem.MolToSmiles(core) compounds[i]["generic_scaffold"] = Chem.MolToSmiles( MurckoScaffold.MakeScaffoldGeneric(core)) return compounds
def compute_scaffold(mol, min_rings=2): mol = get_mol(mol) scaffold = MurckoScaffold.GetScaffoldForMol(mol) n_rings = get_n_rings(scaffold) scaffold_smiles = Chem.MolToSmiles(scaffold) if scaffold_smiles == '' or n_rings < min_rings: return None else: return scaffold_smiles
def get_scaffold(mol): """Computes the Bemis-Murcko scaffold for a molecule. Args: mol: RDKit Mol. Returns: String scaffold SMILES. """ return Chem.MolToSmiles(MurckoScaffold.GetScaffoldForMol(mol), isomericSmiles=True)
def scaffold2smiles(mol, generic=True, return_smiles = True): ''' Returns a SMILES string representing the Murcko Scaffold of a given molecule''' if generic: # Makes a Murcko scaffold generic (all atoms -> carbon and all bonds -> single) scff = MurckoScaffold.MakeScaffoldGeneric(mol) scff = MurckoScaffold.GetScaffoldForMol(scff) scff_smiles = MolToSmiles(scff) else: # Return a smiles scaffold try: scff = MurckoScaffold.GetScaffoldForMol(mol) scff_smiles = MolToSmiles(scff) except: scff_smiles = '' scff = np.nan if return_smiles: return scff_smiles else: return scff
def compute_scaffold(mol, min_rings=2): mol = get_mol(mol) try: scaffold = MurckoScaffold.GetScaffoldForMol(mol) except (ValueError, RuntimeError): return None n_rings = get_n_rings(scaffold) scaffold_smiles = Chem.MolToSmiles(scaffold) if scaffold_smiles == '' or n_rings < min_rings: return None return scaffold_smiles
def _getscaffold(mol, stype='Murcko'): """ *Internal used only* """ assert stype in [ 'Murcko', 'Carbon' ], 'scaffold type must be a member of "Murcko" or "Carbon"' core = MurckoScaffold.GetScaffoldForMol(mol) core = core if stype == 'Murcko' else MurckoScaffold.MakeScaffoldGeneric( core) return Chem.MolToSmiles(core, isomericSmiles=False, canonical=True)
def _calculate_scaffold(self, smile): mol = Chem.MolFromSmiles(smile) if mol: try: scaffold = MurckoScaffold.GetScaffoldForMol(mol) scaffold_smiles = Chem.MolToSmiles(scaffold, isomericSmiles=False) except ValueError: scaffold_smiles = '' else: scaffold_smiles = '' return scaffold_smiles
def get_murckoscf(self): core = None if False not in [self.is_mol(), self.is_small(), self.has_2_rings()]: # 3 requirements fulfilled try: core = MurckoScaffold.GetScaffoldForMol(self.get_mol()) except ValueError: # scf calculation not possible core = None if core is not None: core = Chem.MolToSmiles(core) return (core)
def extract_murcko_scaffolds(mol): """ Extract Bemis-Murcko scaffolds from a smile string. :param mol: {str} smiles string of a molecule. :return: smiles string of a scaffold. """ m1 = MolFromSmiles(mol) try: core = MurckoScaffold.GetScaffoldForMol(m1) scaf = MolToSmiles(core, isomericSmiles=True) except: return '' return scaf
def init_scaffold_tbl(): mol_set = Molecule.objects.all() for mol in mol_set.iterator(): try: core = MurckoScaffold.GetScaffoldForMol(mol.structure) except: continue core_smiles = Chem.MolToSmiles(core) # print core_smiles if core_smiles: scaffold, created = Scaffold.objects.get_or_create(smiles=core_smiles) scaffold.structure = core mol.scaffold = scaffold scaffold.save() mol.save()
def computeFramwork(df): murckos = [] carbons = [] for smi in df['can']: mol = Chem.MolFromSmiles(smi) core = MurckoScaffold.GetScaffoldForMol(mol) carb = MurckoScaffold.MakeScaffoldGeneric(core) #将Murcko骨架和C骨架转成smile mur = Chem.MolToSmiles(core) carb = Chem.MolToSmiles(carb) murckos.append(mur) carbons.append(carb) df['murckos'] = murckos df['carbons'] = carbons return df
def get_stripped_core(self, core_smi): if core_smi is None: print('No core provided generating core with Murcko Scaffold') core = MurckoScaffold.GetScaffoldForMol(self.smi) if Chem.MolToSmiles(core) == Chem.MolToSmiles(self.smi): print( 'Murcko Scaffold failed selecting largest fragment as core' ) hierarch = Recap.RecapDecompose(self.smi).children.keys() tmp = Chem.MolFromSmiles(max(hierarch, key=len)) else: core = Chem.MolFromSmiles(core_smi) tmp = Chem.ReplaceSidechains(self.smi, core) return tmp
def __init__(self, smi): self._smi = smi self._mol = Chem.MolFromSmiles(smi) self._scaf = MurckoScaffold.GetScaffoldForMol( MurckoScaffold.MakeScaffoldGeneric(self._mol)) self._scaf_atoms = self._scaf.GetAtoms() self._scaf_bonds = self._scaf.GetBonds() self._scaf_smi = Chem.MolToSmiles(self._scaf) self._ring_system = self.GetRingSystemsofscaf() self._ring_system_count = self.count_ring_systems() self._bin_values = [1, 2, 3, 4, 7] # Linkers: [direct bond between rings, linear chain between rings, branched chain between rings] self._linkers = [0, 0, 0] self._chain_binning = [0, 0, 0, 0, 0]
def getModeMurckoScaffoldImage(SMILES_list): """ returns the most common murcko scaffold given a list of smiles as an rdkit image. """ murckoScaffolds = [] # Looping through and getting the scaffolds for each smile for smile in SMILES_list: m1 = Chem.MolFromSmiles(smile) core = MurckoScaffold.GetScaffoldForMol(m1) murckoScaffolds.append(core) # Finding the mode Scaffold: mode = max(set(murckoScaffolds), key=murckoScaffolds.count) PIL_img_mode = MolToImage(mode, size=(700, 700)) return PIL_img_mode
def main(name, argv): if not len(argv) == 1: print_usage(name) return with open(argv[0], 'r') as f: lines = f.readlines()[:10] smiles = [line.split()[0] for line in lines] molecules = list( map(lambda smile: [smile, Chem.MolFromSmiles(smile)], smiles)) molecules = [m for m in molecules if m[1] is not None] for mol in molecules: core = MurckoScaffold.GetScaffoldForMol(mol[1]) print Chem.MolToSmiles(core)