def SetupRGroupDecompositionParams():
    """Setup R group decomposition parameters"""

    DecompositionParams = rgd.RGroupDecompositionParameters()

    DecompositionParams.alignment = OptionsInfo["DecompositionParams"][
        "RGroupCoreAlignment"]
    DecompositionParams.chunkSize = OptionsInfo["DecompositionParams"][
        "chunkSize"]
    DecompositionParams.matchingStrategy = OptionsInfo["DecompositionParams"][
        "RGroupMatching"]
    DecompositionParams.onlyMatchAtRGroups = OptionsInfo[
        "DecompositionParams"]["matchOnlyAtRGroups"]
    DecompositionParams.removeAllHydrogenRGroups = OptionsInfo[
        "DecompositionParams"]["removeHydrogenOnlyGroups"]
    DecompositionParams.removeHydrogensPostMatch = OptionsInfo[
        "DecompositionParams"]["removeHydrogensPostMatch"]

    return DecompositionParams
Ejemplo n.º 2
0
      N - number of rgroups
      r2 - regression r squared
      descriptors - set of the descriptors for molecules in the training set
                    used to not enumerate existing molecules
    """
    def __init__(self, rgroups, rgroup_to_descriptor_idx, fitter, r2,
                 descriptors):
        self.rgroups = rgroups  # dictionary 'Core':[core1, core1], 'R1': [rgroup1, rgroup2], ...
        self.rgroup_to_descriptor_idx = rgroup_to_descriptor_idx  # dictionary {smi:descriptor_idx}
        self.fitter = fitter  # fitter rgroup indices -> prediction
        self.N = len(rgroup_to_descriptor_idx)
        self.r2 = r2
        self.descriptors = set([tuple(d) for d in descriptors])


default_decomp_params = rgd.RGroupDecompositionParameters()

# The default decomposition uses the GeneticAlgorirthm
#   fingerprint analysis to break symmetry and make
#   more consistent rgroup decompositions
default_decomp_params.matchingStrategy = rgd.GA

# Also the decomposition is allowed to add new rgroups
default_decomp_params.onlyMatchAtRGroups = False

# use the fingerprint variance method for scoring
default_decomp_params.scoreMethod = rgd.RGroupScore.FingerprintVariance


def FWDecompose(
        scaffolds,
Ejemplo n.º 3
0
def fuzzy_scaffolding(
    mols: List[Chem.rdchem.Mol],
    enforce_subs: List[str] = None,
    n_atom_cuttoff: int = 8,
    additional_templates: List[Chem.rdchem.Mol] = None,
    ignore_non_ring: bool = False,
    mcs_params: Dict[Any, Any] = None,
):
    """Generate fuzzy scaffold with enforceable group that needs to appear
    in the core, forcing to keep the full side chain if required.

    NOTE(hadim): consider parallelize this (if possible).

    Args:
        mols: List of all molecules
        enforce_subs: List of substructure to enforce on the scaffold.
        n_atom_cuttoff: Minimum number of atom a core should have.
        additional_templates: Additional template to use to generate scaffolds.
        ignore_non_ring: Whether to ignore atom no in murcko ring system, even if they are in the framework.
        mcs_params: Arguments of MCS algorithm.

    Returns:
        scaffolds: set
            All found scaffolds in the molecules as valid smiles
        scaffold_infos: dict of dict
            Infos on the scaffold mapping, ignoring any side chain that had to be enforced.
            Key corresponds to generic scaffold smiles
            Values at ['smarts'] corresponds to smarts representation of the true scaffold (from MCS)
            Values at ['mols'] corresponds to list of molecules matching the scaffold
        scaffold_to_group: dict of list
            Map between each generic scaffold and the R-groups decomposition row
    """

    if enforce_subs is None:
        enforce_subs = []

    if additional_templates is None:
        additional_templates = []

    if mcs_params is None:
        mcs_params = {}

    rg_params = rdRGroupDecomposition.RGroupDecompositionParameters()
    rg_params.removeAllHydrogenRGroups = True
    rg_params.removeHydrogensPostMatch = True
    rg_params.alignment = rdRGroupDecomposition.RGroupCoreAlignment.MCS
    rg_params.matchingStrategy = rdRGroupDecomposition.RGroupMatching.Exhaustive
    rg_params.rgroupLabelling = rdRGroupDecomposition.RGroupLabelling.AtomMap
    rg_params.labels = rdRGroupDecomposition.RGroupLabels.AtomIndexLabels

    core_query_param = AdjustQueryParameters()
    core_query_param.makeDummiesQueries = True
    core_query_param.adjustDegree = False
    core_query_param.makeBondsGeneric = True

    # group molecules by they generic Murcko scaffold, allowing
    # side chain that contains cycle (might be a bad idea)
    scf2infos = collections.defaultdict(dict)
    scf2groups = {}
    all_scaffolds = set([])

    for m in mols:
        generic_m = MurckoScaffold.MakeScaffoldGeneric(m)
        scf = MurckoScaffold.GetScaffoldForMol(m)
        try:
            scf = MurckoScaffold.MakeScaffoldGeneric(scf)
        except:
            pass

        if ignore_non_ring:
            rw_scf = Chem.RWMol(scf)
            atms = [a.GetIdx() for a in rw_scf.GetAtoms() if not a.IsInRing()]
            atms.sort(reverse=True)
            for a in atms:
                rw_scf.RemoveAtom(a)
            scfs = list(rdmolops.GetMolFrags(rw_scf, asMols=False))
        else:
            scfs = [dm.to_smiles(scf)]

        # add templates mols if exists:
        for tmp in additional_templates:
            tmp = dm.to_mol(tmp)
            tmp_scf = MurckoScaffold.MakeScaffoldGeneric(tmp)
            if generic_m.HasSubstructMatch(tmp_scf):
                scfs.append(dm.to_smiles(tmp_scf))

        for scf in scfs:
            if scf2infos[scf].get("mols"):
                scf2infos[scf]["mols"].append(m)
            else:
                scf2infos[scf]["mols"] = [m]

    for scf in scf2infos:
        # cheat by adding murcko as last mol always
        popout = False
        mols = scf2infos[scf]["mols"]
        if len(mols) < 2:
            mols = mols + [MurckoScaffold.GetScaffoldForMol(mols[0])]
            popout = True

        # compute the MCS of the cluster
        mcs = rdFMCS.FindMCS(
            mols,
            atomCompare=rdFMCS.AtomCompare.CompareAny,
            bondCompare=rdFMCS.BondCompare.CompareAny,
            completeRingsOnly=True,
            **mcs_params,
        )

        mcsM = Chem.MolFromSmarts(mcs.smartsString)
        mcsM.UpdatePropertyCache(False)
        Chem.SetHybridization(mcsM)

        if mcsM.GetNumAtoms() < n_atom_cuttoff:
            continue

        scf2infos[scf]["smarts"] = dm.to_smarts(mcsM)
        if popout:
            mols = mols[:-1]

        core_groups = []
        # generate rgroups based on the mcs core
        success_mols = []
        try:
            rg = rdRGroupDecomposition.RGroupDecomposition(mcsM, rg_params)
            for i, analog in enumerate(mols):
                analog.RemoveAllConformers()
                res = rg.Add(analog)
                if not (res < 0):
                    success_mols.append(i)
            rg.Process()
            core_groups = rg.GetRGroupsAsRows()
        except Exception:
            pass

        mols = [mols[i] for i in success_mols]
        scf2groups[scf] = core_groups
        for mol, gp in zip(mols, core_groups):
            core = gp["Core"]
            acceptable_groups = [
                a.GetAtomMapNum() for a in core.GetAtoms()
                if (a.GetAtomMapNum() and not a.IsInRing())
            ]

            rgroups = [
                gp[f"R{k}"] for k in acceptable_groups if f"R{k}" in gp.keys()
            ]
            if enforce_subs:
                rgroups = [
                    rgp for rgp in rgroups if not any([
                        len(rgp.GetSubstructMatch(frag)) > 0
                        for frag in enforce_subs
                    ])
                ]
            try:
                scaff = trim_side_chain(
                    mol, AdjustQueryProperties(core, core_query_param),
                    rgroups)
            except:
                continue
            all_scaffolds.add(dm.to_smiles(scaff))

    return all_scaffolds, scf2infos, scf2groups