Esempio n. 1
0
    def test6Options(self):
        smis = ["C1OC1Cc1ccccc1"]
        ms = [Chem.MolFromSmiles(x) for x in smis]
        params = rdScaffoldNetwork.ScaffoldNetworkParams()
        net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
        self.assertEqual(len(net.nodes), 9)
        self.assertEqual(len(net.edges), 8)

        params = rdScaffoldNetwork.ScaffoldNetworkParams()
        params.keepOnlyFirstFragment = False
        net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
        self.assertEqual(len(net.nodes), 19)
        self.assertEqual(len(net.edges), 23)

        params = rdScaffoldNetwork.ScaffoldNetworkParams()
        params.includeGenericScaffolds = False
        net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
        self.assertEqual(len(net.nodes), 5)
        self.assertEqual(len(net.edges), 4)

        params = rdScaffoldNetwork.ScaffoldNetworkParams()
        params.includeGenericBondScaffolds = True
        net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
        self.assertEqual(len(net.nodes), 11)
        self.assertEqual(len(net.edges), 10)
Esempio n. 2
0
 def test7Github3177(self):
     smis = ["C1OC1Cc1ccccc1"]
     ms = [Chem.MolFromSmiles(x) for x in smis]
     ms.append(None)
     params = rdScaffoldNetwork.ScaffoldNetworkParams()
     with self.assertRaises(ValueError):
         net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
Esempio n. 3
0
    def test4Str(self):
        smis = ["c1ccccc1CC1NC(=O)CCC1"]
        ms = [Chem.MolFromSmiles(x) for x in smis]
        params = rdScaffoldNetwork.ScaffoldNetworkParams()

        net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
        self.assertEqual(len(net.nodes), 9)
        self.assertEqual(len(net.edges), 8)
        self.assertEqual(str(net.edges[0]),
                         "NetworkEdge( 0->1, type:Fragment )")
Esempio n. 4
0
 def test5FragmentationReactions(self):
     smis = ["c1c(CC2CC2)cc(NC2CC2)cc1OC1CC1"]
     ms = [Chem.MolFromSmiles(x) for x in smis]
     params = rdScaffoldNetwork.ScaffoldNetworkParams([
         "[!#0;R:1]-!@[O:2]>>[*:1]-[#0].[#0]-[*:2]",
         "[!#0;R:1]-!@[N:2]>>[*:1]-[#0].[#0]-[*:2]"
     ])
     params.includeScaffoldsWithoutAttachments = False
     params.includeGenericScaffolds = False
     net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
     self.assertEqual(len(net.nodes), 5)
     self.assertEqual(len(net.edges), 7)
Esempio n. 5
0
    def test3Update(self):
        smis = ["c1ccccc1CC1NC(=O)CCC1", "c1cccnc1CC1NC(=O)CCC1"]
        ms = [Chem.MolFromSmiles(x) for x in smis]
        params = rdScaffoldNetwork.ScaffoldNetworkParams()
        net = rdScaffoldNetwork.ScaffoldNetwork()
        rdScaffoldNetwork.UpdateScaffoldNetwork(ms[0:1], net, params)
        self.assertEqual(len(net.nodes), 9)
        self.assertEqual(len(net.edges), 8)
        self.assertEqual(len(net.counts), len(net.nodes))
        self.assertEqual(list(net.counts).count(1), len(net.counts))
        rdScaffoldNetwork.UpdateScaffoldNetwork(ms[1:2], net, params)
        self.assertEqual(len(net.nodes), 12)
        self.assertEqual(len(net.edges), 13)
        self.assertEqual(len(net.counts), len(net.nodes))
        self.assertEqual(
            len([
                x for x in net.edges
                if x.type == rdScaffoldNetwork.EdgeType.Fragment
            ]), 4)
        self.assertEqual(
            len([
                x for x in net.edges
                if x.type == rdScaffoldNetwork.EdgeType.Generic
            ]), 6)
        self.assertEqual(
            len([
                x for x in net.edges
                if x.type == rdScaffoldNetwork.EdgeType.RemoveAttachment
            ]), 3)

        net = rdScaffoldNetwork.CreateScaffoldNetwork(ms[0:1], params)
        rdScaffoldNetwork.UpdateScaffoldNetwork(ms[1:2], net, params)
        self.assertEqual(len(net.nodes), 12)
        self.assertEqual(len(net.edges), 13)
        self.assertEqual(
            len([
                x for x in net.edges
                if x.type == rdScaffoldNetwork.EdgeType.Fragment
            ]), 4)
        self.assertEqual(
            len([
                x for x in net.edges
                if x.type == rdScaffoldNetwork.EdgeType.Generic
            ]), 6)
        self.assertEqual(
            len([
                x for x in net.edges
                if x.type == rdScaffoldNetwork.EdgeType.RemoveAttachment
            ]), 3)
Esempio n. 6
0
    def test1Pickle(self):
        smis = ["c1ccccc1CC1NC(=O)CCC1", "c1cccnc1CC1NC(=O)CCC1"]
        ms = [Chem.MolFromSmiles(x) for x in smis]
        params = rdScaffoldNetwork.ScaffoldNetworkParams()
        params.includeScaffoldsWithoutAttachments = False
        net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
        self.assertEqual(len(net.nodes), 7)
        self.assertEqual(len(net.edges), 7)

        pkl = pickle.dumps(net)
        net2 = pickle.loads(pkl)
        self.assertEqual(len(net2.nodes), 7)
        self.assertEqual(len(net2.edges), 7)
        self.assertEqual(list(net2.nodes), list(net.nodes))
        self.assertEqual([str(x) for x in net2.edges],
                         [str(x) for x in net.edges])
Esempio n. 7
0
 def test2Basics(self):
     smis = ["c1ccccc1CC1NC(=O)CCC1", "c1cccnc1CC1NC(=O)CCC1"]
     ms = [Chem.MolFromSmiles(x) for x in smis]
     params = rdScaffoldNetwork.ScaffoldNetworkParams()
     params.includeScaffoldsWithoutAttachments = False
     net = rdScaffoldNetwork.CreateScaffoldNetwork(ms, params)
     self.assertEqual(len(net.nodes), 7)
     self.assertEqual(len(net.edges), 7)
     self.assertEqual(
         len([
             x for x in net.edges
             if x.type == rdScaffoldNetwork.EdgeType.Fragment
         ]), 4)
     self.assertEqual(
         len([
             x for x in net.edges
             if x.type == rdScaffoldNetwork.EdgeType.Generic
         ]), 3)
class ScaffoldFoldAssign(object):
    priority_cols = [
        "num_rings_delta",
        "has_macrocyle",
        "num_rbonds",
        "num_bridge",
        "num_spiro",
        "has_unusual_ring_size",
        "num_hrings",
        "num_arings",
        "node_smiles",
    ]
    priority_asc = [True, False, True, False, False, False, False, True, True]
    assert len(priority_cols) == len(
        priority_asc), "priority_cols and priorty_asc must have same length"
    nrings_target = 3

    # rdScaffoldNetwork.ScaffoldNetworkParams are hardwired, since the heuristcs are not guaranteed to work with different setup here
    snparams = rdScaffoldNetwork.ScaffoldNetworkParams()
    snparams.flattenIsotopes = True
    snparams.includeGenericBondScaffolds = False
    snparams.includeGenericScaffolds = False
    snparams.includeScaffoldsWithAttachments = False  # this needs to be hardwired to False, as we start from Murcko, which has no attachment information
    snparams.includeScaffoldsWithoutAttachments = True  # this needs to hardwred to True,  as we start from Murcko, which has no attachment information
    snparams.pruneBeforeFragmenting = True

    # default constructor expecting all attributes passed as keyword arguments
    def __init__(self, secret, nfolds=5, verbosity=0):
        """Function to create and initialize a SaccolFoldAssign Calculator'

        Args:
            secret: secret key used (for fold hashing)
            nfolds: desired number of folds
            verbosity: controlls verbosity
        """

        self.nfolds = nfolds

        self.secret = secret.encode()
        self.verbosity = verbosity

    # methods required to pickle.
    # rdScaffoldNetwork.ScaffoldNetworkParams() canbnot be pickled and need to be initialized a new each time
    # def __getstate__(self):
    #    return self.secret, self.nfolds, self.verbosity

    # def __setstate__(self, secret, nfolds, verbosity):
    #    self.__init__(secret, nfolds, verbosity)

    @classmethod
    def from_param_dict(cls, secret, method_param_dict, verbosity=0):
        """Function to create and initialize a SaccolFoldAssign Calculator

        Args:
            secret: secret key used (for fold hashing)
            verbosity (int): controlls verbosity
            par_dict(dict): dictionary of method parameters
        """
        return cls(secret=secret, **method_param_dict, verbosity=verbosity)

    @staticmethod
    def murcko_scaff_smiles(mol_smiles):
        """Function to clauclate the Murcko scaffold, wrapper around rdkit MurckoScaffold.GetScaffoldForMol

        Args:
            mol_smiles(str): valid smiles of a molecule

        Returns:
            str: smiles string of the Murcko Scaffold

        """
        mol = Chem.MolFromSmiles(mol_smiles)
        if mol is not None:
            murcko_smiles = Chem.MolToSmiles(
                MurckoScaffold.GetScaffoldForMol(mol))
            if murcko_smiles == "":
                return None
            else:
                return murcko_smiles
        else:
            raise ValueError("could not parse smiles {}".format(mol_smiles))

    @staticmethod
    def has_unusual_ringsize(mol):
        """Function to check for ringsizes different than 5 or 6

        Args:
            mol(rdkit.Chem.rdchem.Mol): molecule

        Returns:
            bool: boolean indicating whether usnusally sized ring is present

        """

        return (len([
            len(x)
            for x in mol.GetRingInfo().AtomRings() if len(x) > 6 or len(x) < 5
        ]) > 0)

    @staticmethod
    def has_macrocycle(mol):
        """Function to check for macrocycles with rinsize > 9

        Args:
            mol(rdkit.Chem.rdchem.Mol): molecule

        Returns:
            bool: boolean indicating whether macrocycle is present

        """

        return len(
            [len(x) for x in mol.GetRingInfo().AtomRings() if len(x) > 9]) > 0

    def sn_scaff_smiles(self, murcko_smiles):
        """Function to exctract the preferred scaffold based on Scaffold Tree rules from the scaffold network created from a Murcko scaffold

        Args:
            murcko_smiles(str): valdi smiles string of a Murcko scaffold

        Returns:
            str: smiles string of the preferred scaffold

        """

        if murcko_smiles is None:
            return None
        mol = Chem.MolFromSmiles(murcko_smiles)
        if mol is not None:
            # if the murcko scaffold has less or equal than the targeted number of rings, then the Murcko scaffold is already the sn_scaffold,
            # so no further decomposition is needed
            if Chem.rdMolDescriptors.CalcNumRings(mol) <= self.nrings_target:
                return murcko_smiles
            # otherwise start decomposition
            try:
                sn = rdScaffoldNetwork.CreateScaffoldNetwork([mol],
                                                             self.snparams)
            except:
                raise ValueError(
                    "failed to calculate scaffold network for {}".format(
                        murcko_smiles))
            # create data fram with n ode smiles
            node_df = pd.DataFrame({"node_smiles": [str(n) for n in sn.nodes]})
            PandasTools.AddMoleculeColumnToFrame(node_df,
                                                 "node_smiles",
                                                 "mol",
                                                 includeFingerprints=False)
            node_df["num_rings"] = node_df["mol"].apply(
                Chem.rdMolDescriptors.CalcNumRings)
            node_df["num_rings_delta"] = (node_df["num_rings"] -
                                          self.nrings_target).abs()
            node_df["num_rbonds"] = node_df["mol"].apply(
                Chem.rdMolDescriptors.CalcNumRotatableBonds, strict=False)
            node_df["num_hrings"] = node_df["mol"].apply(
                Chem.rdMolDescriptors.CalcNumHeterocycles)
            node_df["num_arings"] = node_df["mol"].apply(
                Chem.rdMolDescriptors.CalcNumAromaticRings)
            node_df["num_bridge"] = node_df["mol"].apply(
                Chem.rdMolDescriptors.CalcNumBridgeheadAtoms)
            node_df["num_spiro"] = node_df["mol"].apply(
                Chem.rdMolDescriptors.CalcNumSpiroAtoms)
            node_df["has_macrocyle"] = node_df["mol"].apply(
                self.has_macrocycle)
            node_df["has_unusual_ring_size"] = node_df["mol"].apply(
                self.has_unusual_ringsize)
            node_df.sort_values(self.priority_cols,
                                ascending=self.priority_asc,
                                inplace=True)
            return node_df.iloc[0]["node_smiles"]
        else:
            raise ValueError("murcko_smiles {} cannot be read by rdkit".format(
                murcko_smiles))

    def hashed_fold_scaffold(self, sn_smiles):
        """applies hashing to assign scaffold sn_smiles to a fold

        Args:
            sn_smiles(str): smiles of the scaffold network scaffold

        Returns:
            int: fold id
        """
        scaff = str(sn_smiles).encode("ASCII")
        h = hmac.new(self.secret, msg=scaff, digestmod=hashlib.sha256)
        random.seed(h.digest(), version=2)
        return random.randint(0, self.nfolds - 1)

    # this function contaisn  the key functionality
    def calculate_single(self, smiles):
        """Function to calculate a sn_scaffold and fold_id from an individual smiles

        Args:
            smiles (str) : standardized smiles

        Returns:
            Tuple(str, str, int, bool, str) : a tuple of murcko_smiles, sn_scaffold_smiles, fold_id, Success_flag, error_message
        """
        try:
            murcko_smiles = self.murcko_scaff_smiles(smiles)
            sn_smiles = self.sn_scaff_smiles(murcko_smiles)
            fold_id = self.hashed_fold_scaffold(sn_smiles)
        except ValueError as err:
            return None, None, None, False, str(err)
        return murcko_smiles, sn_smiles, fold_id, True, None