def SetupRGroupDecompositionParams(): """Setup R group decomposition parameters""" DecompositionParams = rgd.RGroupDecompositionParameters() DecompositionParams.alignment = OptionsInfo["DecompositionParams"][ "RGroupCoreAlignment"] DecompositionParams.chunkSize = OptionsInfo["DecompositionParams"][ "chunkSize"] DecompositionParams.matchingStrategy = OptionsInfo["DecompositionParams"][ "RGroupMatching"] DecompositionParams.onlyMatchAtRGroups = OptionsInfo[ "DecompositionParams"]["matchOnlyAtRGroups"] DecompositionParams.removeAllHydrogenRGroups = OptionsInfo[ "DecompositionParams"]["removeHydrogenOnlyGroups"] DecompositionParams.removeHydrogensPostMatch = OptionsInfo[ "DecompositionParams"]["removeHydrogensPostMatch"] return DecompositionParams
N - number of rgroups r2 - regression r squared descriptors - set of the descriptors for molecules in the training set used to not enumerate existing molecules """ def __init__(self, rgroups, rgroup_to_descriptor_idx, fitter, r2, descriptors): self.rgroups = rgroups # dictionary 'Core':[core1, core1], 'R1': [rgroup1, rgroup2], ... self.rgroup_to_descriptor_idx = rgroup_to_descriptor_idx # dictionary {smi:descriptor_idx} self.fitter = fitter # fitter rgroup indices -> prediction self.N = len(rgroup_to_descriptor_idx) self.r2 = r2 self.descriptors = set([tuple(d) for d in descriptors]) default_decomp_params = rgd.RGroupDecompositionParameters() # The default decomposition uses the GeneticAlgorirthm # fingerprint analysis to break symmetry and make # more consistent rgroup decompositions default_decomp_params.matchingStrategy = rgd.GA # Also the decomposition is allowed to add new rgroups default_decomp_params.onlyMatchAtRGroups = False # use the fingerprint variance method for scoring default_decomp_params.scoreMethod = rgd.RGroupScore.FingerprintVariance def FWDecompose( scaffolds,
def fuzzy_scaffolding( mols: List[Chem.rdchem.Mol], enforce_subs: List[str] = None, n_atom_cuttoff: int = 8, additional_templates: List[Chem.rdchem.Mol] = None, ignore_non_ring: bool = False, mcs_params: Dict[Any, Any] = None, ): """Generate fuzzy scaffold with enforceable group that needs to appear in the core, forcing to keep the full side chain if required. NOTE(hadim): consider parallelize this (if possible). Args: mols: List of all molecules enforce_subs: List of substructure to enforce on the scaffold. n_atom_cuttoff: Minimum number of atom a core should have. additional_templates: Additional template to use to generate scaffolds. ignore_non_ring: Whether to ignore atom no in murcko ring system, even if they are in the framework. mcs_params: Arguments of MCS algorithm. Returns: scaffolds: set All found scaffolds in the molecules as valid smiles scaffold_infos: dict of dict Infos on the scaffold mapping, ignoring any side chain that had to be enforced. Key corresponds to generic scaffold smiles Values at ['smarts'] corresponds to smarts representation of the true scaffold (from MCS) Values at ['mols'] corresponds to list of molecules matching the scaffold scaffold_to_group: dict of list Map between each generic scaffold and the R-groups decomposition row """ if enforce_subs is None: enforce_subs = [] if additional_templates is None: additional_templates = [] if mcs_params is None: mcs_params = {} rg_params = rdRGroupDecomposition.RGroupDecompositionParameters() rg_params.removeAllHydrogenRGroups = True rg_params.removeHydrogensPostMatch = True rg_params.alignment = rdRGroupDecomposition.RGroupCoreAlignment.MCS rg_params.matchingStrategy = rdRGroupDecomposition.RGroupMatching.Exhaustive rg_params.rgroupLabelling = rdRGroupDecomposition.RGroupLabelling.AtomMap rg_params.labels = rdRGroupDecomposition.RGroupLabels.AtomIndexLabels core_query_param = AdjustQueryParameters() core_query_param.makeDummiesQueries = True core_query_param.adjustDegree = False core_query_param.makeBondsGeneric = True # group molecules by they generic Murcko scaffold, allowing # side chain that contains cycle (might be a bad idea) scf2infos = collections.defaultdict(dict) scf2groups = {} all_scaffolds = set([]) for m in mols: generic_m = MurckoScaffold.MakeScaffoldGeneric(m) scf = MurckoScaffold.GetScaffoldForMol(m) try: scf = MurckoScaffold.MakeScaffoldGeneric(scf) except: pass if ignore_non_ring: rw_scf = Chem.RWMol(scf) atms = [a.GetIdx() for a in rw_scf.GetAtoms() if not a.IsInRing()] atms.sort(reverse=True) for a in atms: rw_scf.RemoveAtom(a) scfs = list(rdmolops.GetMolFrags(rw_scf, asMols=False)) else: scfs = [dm.to_smiles(scf)] # add templates mols if exists: for tmp in additional_templates: tmp = dm.to_mol(tmp) tmp_scf = MurckoScaffold.MakeScaffoldGeneric(tmp) if generic_m.HasSubstructMatch(tmp_scf): scfs.append(dm.to_smiles(tmp_scf)) for scf in scfs: if scf2infos[scf].get("mols"): scf2infos[scf]["mols"].append(m) else: scf2infos[scf]["mols"] = [m] for scf in scf2infos: # cheat by adding murcko as last mol always popout = False mols = scf2infos[scf]["mols"] if len(mols) < 2: mols = mols + [MurckoScaffold.GetScaffoldForMol(mols[0])] popout = True # compute the MCS of the cluster mcs = rdFMCS.FindMCS( mols, atomCompare=rdFMCS.AtomCompare.CompareAny, bondCompare=rdFMCS.BondCompare.CompareAny, completeRingsOnly=True, **mcs_params, ) mcsM = Chem.MolFromSmarts(mcs.smartsString) mcsM.UpdatePropertyCache(False) Chem.SetHybridization(mcsM) if mcsM.GetNumAtoms() < n_atom_cuttoff: continue scf2infos[scf]["smarts"] = dm.to_smarts(mcsM) if popout: mols = mols[:-1] core_groups = [] # generate rgroups based on the mcs core success_mols = [] try: rg = rdRGroupDecomposition.RGroupDecomposition(mcsM, rg_params) for i, analog in enumerate(mols): analog.RemoveAllConformers() res = rg.Add(analog) if not (res < 0): success_mols.append(i) rg.Process() core_groups = rg.GetRGroupsAsRows() except Exception: pass mols = [mols[i] for i in success_mols] scf2groups[scf] = core_groups for mol, gp in zip(mols, core_groups): core = gp["Core"] acceptable_groups = [ a.GetAtomMapNum() for a in core.GetAtoms() if (a.GetAtomMapNum() and not a.IsInRing()) ] rgroups = [ gp[f"R{k}"] for k in acceptable_groups if f"R{k}" in gp.keys() ] if enforce_subs: rgroups = [ rgp for rgp in rgroups if not any([ len(rgp.GetSubstructMatch(frag)) > 0 for frag in enforce_subs ]) ] try: scaff = trim_side_chain( mol, AdjustQueryProperties(core, core_query_param), rgroups) except: continue all_scaffolds.add(dm.to_smiles(scaff)) return all_scaffolds, scf2infos, scf2groups