def extract_side_chains(mol, remove_duplicates=False, mark='[*]'): """ Extract side chains from a smiles string. Core is handled as Murcko scaffold. :param mol: {str} smiles string of a molecule. :param remove_duplicates: {bool} Keep or remove duplicates. :param mark: character to mark attachment points. :return: smiles strings of side chains in a list, attachment points replaced by [R]. """ pos = range(0, 20) set_pos = ['[' + str(x) + '*]' for x in pos] m1 = MolFromSmiles(mol) try: core = MurckoScaffold.GetScaffoldForMol(m1) side_chain = ReplaceCore(m1, core) smi = MolToSmiles(side_chain, isomericSmiles=True ) # isomericSmiles adds a number to the dummy atoms. except: return list() for i in pos: smi = smi.replace(''.join(set_pos[i]), mark) if remove_duplicates: return list(set(smi.split('.'))) else: return smi.split('.')
def impsmiles(self, m, keepiso=False): """make implicit smiles from molecule""" import re #smi = MolToSmiles(m, canonical=True, isomericSmiles=True) smi = MolToSmiles(m, canonical=False, isomericSmiles=keepiso) if keepiso == False: smi = smi.replace('([H])', '') smi = smi.replace('[H]', '') mol = MolFromSmiles(smi) outsmi = "" iatom = 0 atoms = mol.GetAtoms() #parts = re.split("([\d\(\)\+\[\]-=#:])",smi); parts = re.split("(\[.*?\]|Cl|Br|F|I|B|C|c|N|n|O|o|S|s|P|p)", smi) #return [a.GetSymbol() for a in atoms] for p in parts: if len(p) == 0: pass elif p.isalpha(): hcount = atoms[iatom].GetImplicitValence() if hcount == 0: outsmi += p elif hcount == 1: outsmi += "[%sH]" % p else: outsmi += "[%sH%d]" % (p, hcount) iatom += 1 elif p.startswith("["): hcount = atoms[iatom].GetNumImplicitHs() if hcount == 0: outsmi += p elif hcount == 1: outsmi += "[%sH%d]" % (atoms[iatom].GetSymbol(), atoms[iatom].GetFormalCharge()) else: outsmi += "[%sH%d%+d]" % (atoms[iatom].GetSymbol(), hcount, atoms[iatom].GetFormalCharge()) iatom += 1 else: outsmi += p return outsmi
def Write(self, degrees, edges, canonical=True): if set(degrees).issubset(self.allowed): # Define the molecule cp = RWMol() _ = [cp.AddAtom(Atom(self.d2atno[D])) for D in degrees] _ = [cp.AddBond(f, t, BondType.SINGLE) for f, t in edges] # Export as canonical SMILES or a random SMILES if canonical: out = MolToSmiles(cp, canonical=True) else: out = MolToSmiles(cp, canonical=False, doRandom=True) # Carry out replacements for src, dst in self.replacements: out = out.replace(src, dst) return out.upper() else: return None
def extract_murcko_scaffolds_marked(mol, mark='[*]'): """ Extract Bemis-Murcko scaffolds from a smile string. :param mol: {str} smiles string of a molecule. :param mark: character to mark attachment points. :return: smiles string of a scaffold, side chains replaced with [R]. """ pos = range(0, 20) set_pos = ['[' + str(x) + '*]' for x in pos] m1 = MolFromSmiles(mol) try: core = MurckoScaffold.GetScaffoldForMol(m1) tmp = ReplaceSidechains(m1, core) smi = MolToSmiles(tmp, isomericSmiles=True ) # isomericSmiles adds a number to the dummy atoms. except: return '' for i in pos: smi = smi.replace(''.join(set_pos[i]), mark) return smi