def extract_side_chains(mol, remove_duplicates=False, mark='[*]'): """ Extract side chains from a smiles string. Core is handled as Murcko scaffold. :param mol: {str} smiles string of a molecule. :param remove_duplicates: {bool} Keep or remove duplicates. :param mark: character to mark attachment points. :return: smiles strings of side chains in a list, attachment points replaced by [R]. """ pos = range(0, 20) set_pos = ['[' + str(x) + '*]' for x in pos] m1 = MolFromSmiles(mol) try: core = MurckoScaffold.GetScaffoldForMol(m1) side_chain = ReplaceCore(m1, core) smi = MolToSmiles(side_chain, isomericSmiles=True ) # isomericSmiles adds a number to the dummy atoms. except: return list() for i in pos: smi = smi.replace(''.join(set_pos[i]), mark) if remove_duplicates: return list(set(smi.split('.'))) else: return smi.split('.')
def retrieve_fragments(mol): substructures_smiles = MolToSmiles(mol) sidechains = [] sidechains_smiles_list = substructures_smiles.split(".") for sidechain_smiles in sidechains_smiles_list: sidechain = MolFromSmiles(sidechain_smiles) if sidechain_smiles != "": sidechains.append(sidechain) return sidechains, sidechains_smiles_list