Esempio n. 1
0
def __fragment_mol(mol, radius=3, return_ids=True, keep_stereo=False, protected_ids=None):
    """
    INPUT:
        mol - Mol
        radius - integer, number of bonds to cut context
        keep_stereo - bool, keep or discard information about stereoconfiguration
        protected_ids - set/list/tuple os atom ids which cannot be present in core fragments

    OUTPUT:
        list of tuples (env_smi, core_smi, tuple of core atom ids)
        ('C[*:1].C[*:2]', 'CC(C(=O)O)c1ccc(CC([*:1])[*:2])c(Br)c1', (1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))
        ('Cc(c)c(cc)[*:1]', '[H][*:1]', (25,))

    If input mol has explicit hydrogens the output will contain also fragments where core = [H][*:1].
    Smiles of fragments with heavy atoms will contain only heavy atoms
    """

    def get_atom_prop(molecule, prop="Index"):
        res = []
        for a in molecule.GetAtoms():
            if a.GetAtomicNum():
                res.append(a.GetIntProp(prop))
        return tuple(sorted(res))

    if protected_ids:
        return_ids = True

    output = []

    # set original atom idx to keep them in fragmented mol
    if return_ids:
        for atom in mol.GetAtoms():
            atom.SetIntProp("Index", atom.GetIdx())

    # heavy atoms
    frags = rdMMPA.FragmentMol(mol, pattern="[!#1]!@!=!#[!#1]", maxCuts=4, resultsAsMols=True, maxCutBonds=30)
    # hydrogen atoms
    frags += rdMMPA.FragmentMol(mol, pattern="[#1]!@!=!#[!#1]", maxCuts=1, resultsAsMols=True, maxCutBonds=100)

    for i, (core, chains) in enumerate(frags):
        if core is None:  # single cut
            components = list(Chem.GetMolFrags(chains, asMols=True))
            ids_0 = get_atom_prop(components[0]) if return_ids else tuple()
            ids_1 = get_atom_prop(components[1]) if return_ids else tuple()
            if Chem.MolToSmiles(components[0]) != '[H][*:1]':  # context cannot be H
                env, frag = get_canon_context_core(components[0], components[1], radius, keep_stereo)
                output.append((env, frag, ids_1))
            if Chem.MolToSmiles(components[1]) != '[H][*:1]':  # context cannot be H
                env, frag = get_canon_context_core(components[1], components[0], radius, keep_stereo)
                output.append((env, frag, ids_0))
        else:   # multiple cuts
            # there are no checks for H needed because H can be present only in single cuts
            env, frag = get_canon_context_core(chains, core, radius, keep_stereo)
            output.append((env, frag, get_atom_prop(core) if return_ids else tuple()))

    if protected_ids:
        protected_ids = set(protected_ids)
        output = [item for item in output if protected_ids.isdisjoint(item[2])]

    return output  # list of tuples (env smiles, core smiles, list of atom ids)
Esempio n. 2
0
 def test5(self):
   m = Chem.MolFromSmiles(
     "CC[C@H](C)[C@@H](C(=O)N[C@H]1CSSC[C@H]2C(=O)NCC(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](CSSC[C@@H](C(=O)N[C@H](C(=O)N4CCC[C@H]4C(=O)N[C@H](C(=O)N2)C)CC(=O)N)NC1=O)C(=O)N)CO)Cc5ccc(cc5)O)CCCC[NH3+])N")  # ALPHA-CONOTOXIN SI
   frags = rdMMPA.FragmentMol(m, resultsAsMols=False)
   self.assertFalse(len(frags))
   frags = rdMMPA.FragmentMol(m, maxCuts=2, maxCutBonds=21, resultsAsMols=False)
   self.assertEqual(len(frags), 231)
Esempio n. 3
0
def mmpa_frag(
    mol,
    pattern: str = None,
    max_cut: int = 1,
    max_bond_cut: int = 20,
    h_split: bool = False,
) -> Optional[Set[Chem.Mol]]:
    """Fragment molecule on specific bonds suitable for a MMPA analysis.

    Args:
        mol: Molecule to fragment.
        pattern: Bond pattern to split on. Will use default rdkit pattern
            '[#6+0;!$(*=,#[!#6])]!@!=!#[*]' if not provided.
        max_cut: Number of cuts.
        max_bond_cut: Maximum number of bond to cut. Default to 20.
        h_split:  Whether to split at hydrogen position too.
            This is equivalent to enabling the addition of new fragments.

    Returns:
        List of fragments
    """

    frags = []
    if pattern is None:
        frags = rdMMPA.FragmentMol(
            mol,
            maxCuts=max_cut,
            resultsAsMols=False,
            maxCutBonds=max_bond_cut,
        )
    elif pattern:
        frags = rdMMPA.FragmentMol(
            mol,
            pattern=pattern,
            maxCuts=max_cut,
            resultsAsMols=False,
            maxCutBonds=max_bond_cut,
        )

    if h_split:
        mol = Chem.AddHs(mol)
        frags += rdMMPA.FragmentMol(
            mol,
            pattern="[#1]!@!=!#[!#1]",
            maxCuts=1,
            resultsAsMols=False,
            maxCutBonds=max_bond_cut,
        )
    return set(frags)
Esempio n. 4
0
    def test8(self):
        m = Chem.MolFromSmiles(
            'Cc1ccccc1NC(=O)C(C)[NH+]1CCCC1')  # ZINC00000051
        sm = Chem.MolFromSmarts("[#6+0;!$(*=,#[!#6])]!@!=!#[*]")
        matching_atoms = m.GetSubstructMatches(sm)
        bonds = []
        for a, b in matching_atoms:
            bond = m.GetBondBetweenAtoms(a, b)
            bonds.append(bond.GetIdx())

        frags = rdMMPA.FragmentMol(m, resultsAsMols=False)
        frags2 = rdMMPA.FragmentMol(m, bonds, resultsAsMols=False)
        frags3 = rdMMPA.FragmentMol(m, tuple(bonds), resultsAsMols=False)
        self.assertEqual(frags, frags2)
        self.assertEqual(frags2, frags3)
Esempio n. 5
0
    def test1(self):
        m = Chem.MolFromSmiles('c1ccccc1OC')
        frags = rdMMPA.FragmentMol(m)
        self.assertEqual(len(frags), 3)
        for frag in frags:
            self.assertEqual(len(frag), 2)
        frags = sorted(frags, key=natoms)
        self.assertEqual(frags[0][0], None)
        self.assertEqual(frags[1][0], None)
        self.assertNotEqual(frags[2][0], None)
        self.assertNotEqual(frags[0][1], None)
        self.assertNotEqual(frags[1][1], None)
        self.assertNotEqual(frags[2][1], None)

        self.assertEqual(frags[0][1].GetNumAtoms(), m.GetNumAtoms() + 2)
        self.assertEqual(frags[1][1].GetNumAtoms(), m.GetNumAtoms() + 2)

        fs = Chem.GetMolFrags(frags[0][1], asMols=True)
        self.assertEqual(len(fs), 2)
        self.assertEqual(Chem.MolToSmiles(fs[0], True), 'c1ccc([*:1])cc1')
        self.assertEqual(Chem.MolToSmiles(fs[1], True), 'CO[*:1]')

        fs = Chem.GetMolFrags(frags[1][1], asMols=True)
        self.assertEqual(len(fs), 2)
        self.assertEqual(Chem.MolToSmiles(fs[0], True), 'c1ccc(O[*:1])cc1')
        self.assertEqual(Chem.MolToSmiles(fs[1], True), 'C[*:1]')

        fs = Chem.GetMolFrags(frags[2][0], asMols=True)
        self.assertEqual(len(fs), 1)
        self.assertEqual(Chem.MolToSmiles(fs[0], True), 'O([*:1])[*:2]')
        fs = Chem.GetMolFrags(frags[2][1], asMols=True)
        self.assertEqual(len(fs), 2)
        self.assertEqual(Chem.MolToSmiles(fs[0], True), 'c1ccc([*:1])cc1')
        self.assertEqual(Chem.MolToSmiles(fs[1], True), 'C[*:2]')
Esempio n. 6
0
def fragment_mol(smi, cid, pattern="[#6+0;!$(*=,#[!#6])]!@!=!#[*]"):
    mol = Chem.MolFromSmiles(smi)

    #different cuts can give the same fragments
    #to use outlines to remove them
    outlines = set()

    if (mol == None):
        sys.stderr.write("Can't generate mol for: %s\n" % (smi))
    else:
        frags = rdMMPA.FragmentMol(mol,
                                   minCuts=2,
                                   maxCuts=2,
                                   maxCutBonds=100,
                                   pattern=pattern,
                                   resultsAsMols=False)
        for core, chains in frags:
            output = '%s,%s,%s,%s' % (smi, cid, core, chains)
            if (not (output in outlines)):
                outlines.add(output)
        if not outlines:
            # for molecules with no cuts, output the parent molecule itself
            outlines.add('%s,%s,,' % (smi, cid))

    return outlines
Esempio n. 7
0
def fragment_mol(mol, query, max_cuts, keep_stereo, radius):
    # returns list of lists: [['F', [0]], ['C#N', [3, 4]], ... ]

    def get_atom_prop(molecule, prop="Index", only_heavy=True):
        res = []
        for a in molecule.GetAtoms():
            if only_heavy and a.GetAtomicNum() > 1:
                try:
                    res.append(a.GetIntProp(prop))
                except KeyError:
                    continue
        return tuple(sorted(res))

    def get_frag_name(context, core, radius, keep_stereo):
        line = []
        for r in radius:
            env_smi, core_smi = get_canon_context_core(context, core, r, keep_stereo)
            if r == 0:  # for radius = 0 there is no env (empty string)
                line.append(core_smi)
            else:
                if env_smi and core_smi:
                    line.append('%s|%s' % (core_smi, env_smi))
        return '||'.join(line) if line else None

    # modify representation of NO2 groups to charged version
    mol = replace_no2(mol)
    err = Chem.SanitizeMol(mol, catchErrors=True)
    if err != 0:
        print('Molecule %s failed to sanitize due to: ' % mol.GetProp("_Name") + str(err))
        return []

    output = []

    for atom in mol.GetAtoms():
        atom.SetIntProp("Index", atom.GetIdx())

    frags = rdMMPA.FragmentMol(mol, pattern=query, maxCuts=max_cuts, resultsAsMols=True, maxCutBonds=30)

    for core, chains in frags:
        if core is None:  # single cut
            components = list(Chem.GetMolFrags(chains, asMols=True))
            ids_0 = get_atom_prop(components[0])
            ids_1 = get_atom_prop(components[1])
            if Chem.MolToSmiles(components[0]) != '[H][*:1]':  # context cannot be H
                frag_name = get_frag_name(components[0], components[1], radius, keep_stereo)
                if frag_name:
                    output.append((frag_name, ids_1))
            if Chem.MolToSmiles(components[1]) != '[H][*:1]':  # context cannot be H
                frag_name = get_frag_name(components[1], components[0], radius, keep_stereo)
                if frag_name:
                    output.append((frag_name, ids_0))
        else:   # multiple cuts
            # there are no checks for H needed because H can be present only in single cuts
            frag_name = get_frag_name(chains, core, radius, keep_stereo)
            if frag_name:
                output.append((frag_name, get_atom_prop(core)))

    return output
Esempio n. 8
0
    def test3(self):
        m = Chem.MolFromSmiles('c1ccccc1OC')
        frags = rdMMPA.FragmentMol(m, resultsAsMols=False, pattern='cO')
        self.assertEqual(len(frags), 1)
        for frag in frags:
            self.assertEqual(len(frag), 2)
        frags = sorted(frags)
        self.assertEqual(frags[0][0], '')
        self.assertNotEqual(frags[0][1], '')

        self.assertEqual(frags[0][1], 'CO[*:1].c1ccc(cc1)[*:1]')
Esempio n. 9
0
def mmps_cutting(mol, pattern="[#6+0;!$(*=,#[!#6])]!@!=!#[*]", dummy=True, filtering=True):
    """ MMPs function"""
    FMQs = []
    fmq = None
    #mol = Chem.MolFromSmiles(smi)
    try:
        smi = Chem.MolToSmiles(mol)
        bricks = rdMMPA.FragmentMol(mol, minCuts=2, maxCuts=2, maxCutBonds=100, \
                                    pattern=pattern, resultsAsMols=False)

        for linker, chains in bricks:

            linker_mol = Chem.MolFromSmiles(linker)
            linker_size = linker_mol.GetNumHeavyAtoms()
            linker_site_idxs = [atom.GetIdx() for atom in linker_mol.GetAtoms() if atom.GetAtomicNum() == 0]
            linker_length = len(Chem.rdmolops.GetShortestPath(linker_mol, \
                                                              linker_site_idxs[0], linker_site_idxs[1])) - 2

            if (linker_size >= 2) & (linker_length >= 1):
                frag1_mol = Chem.MolFromSmiles(chains.split(".")[0])
                frag2_mol = Chem.MolFromSmiles(chains.split(".")[1])
                frag1_size = frag1_mol.GetNumHeavyAtoms()
                frag2_size = frag2_mol.GetNumHeavyAtoms()


                if (frag1_size >= 5) & ((frag2_size >= 5) & ((frag1_size + frag1_size) >= linker_size)):

                    if filtering:

                        action = filter(linker_mol, type="frags") & filter(frag1_mol, type="frags") \
                                 & filter(frag2_mol, type="frags")
                        if action:

                            if dummy:
                                fmq = "L_" + str(linker_length) + "." + "%s" % (linker) + "." \
                                      + "%s" % (chains) + ">" + "%s" % (smi)
                            else:
                                fmq = "L_" + str(linker_length) + "." + "%s" % (linker) + "." \
                                      + "%s" % (remove_dummys(chains)) + ">" + "%s" % (smi)
                    else:

                        if dummy:
                            fmq = "L_" + str(linker_length) + "." + "%s" % (linker) + "." \
                                  + "%s" % (chains) + ">" + "%s" % (smi)
                        else:
                            fmq = "L_" + str(linker_length) + "." + "%s" % (linker) + "." \
                                  + "%s" % (remove_dummys(chains)) + ">" + "%s" % (smi)

                    FMQs.append(fmq)
    except:
        print("error")
        FMQs = []

    return FMQs
Esempio n. 10
0
    def test7(self):
        m = Chem.MolFromSmiles("Oc1ccccc1N")

        frags1 = rdMMPA.FragmentMol(m,
                                    minCuts=1,
                                    maxCuts=1,
                                    maxCutBonds=21,
                                    resultsAsMols=False)

        frags2 = rdMMPA.FragmentMol(m,
                                    minCuts=2,
                                    maxCuts=2,
                                    maxCutBonds=21,
                                    resultsAsMols=False)

        frags = rdMMPA.FragmentMol(m,
                                   maxCuts=2,
                                   maxCutBonds=21,
                                   resultsAsMols=False)

        self.assertEqual(set(frags1 + frags2), set(frags))
Esempio n. 11
0
 def test4(self):
   m = Chem.MolFromSmiles('Cc1ccccc1NC(=O)C(C)[NH+]1CCCC1')  # ZINC00000051
   frags = rdMMPA.FragmentMol(m, resultsAsMols=False)
   #for frag in sorted(frags):
   #    print(frag)
   cores = set(x[0] for x in frags)
   self.assertTrue('C([*:1])([*:2])[*:3]' in cores)
   # FIX: this needs to be investigated, it's not currently passing
   #self.assertTrue('O=C(N[*:3])C([*:1])[*:2]' in cores)
   self.assertEqual(len(frags), 18)
   for frag in frags:
     self.assertEqual(len(frag), 2)
Esempio n. 12
0
def fragment_mol(smi, smi_id='', mode=0):

    mol = Chem.MolFromSmiles(smi)

    outlines = set()

    if mol is None:
        sys.stderr.write("Can't generate mol for: %s\n" % smi)
    else:
        # heavy atoms
        if mode == 0 or mode == 1:
            frags = rdMMPA.FragmentMol(mol,
                                       pattern="[!#1]!@!=!#[!#1]",
                                       maxCuts=4,
                                       resultsAsMols=False,
                                       maxCutBonds=30)
            frags += rdMMPA.FragmentMol(mol,
                                        pattern="[!#1]!@!=!#[!#1]",
                                        maxCuts=3,
                                        resultsAsMols=False,
                                        maxCutBonds=30)
            frags = set(frags)
            for core, chains in frags:
                output = '%s,%s,%s,%s\n' % (smi, smi_id, core, chains)
                outlines.add(output)
        # hydrogen splitting
        if mode == 1 or mode == 2:
            mol = Chem.AddHs(mol)
            n = mol.GetNumAtoms() - mol.GetNumHeavyAtoms()
            if n < 60:
                frags = rdMMPA.FragmentMol(mol,
                                           pattern="[#1]!@!=!#[!#1]",
                                           maxCuts=1,
                                           resultsAsMols=False,
                                           maxCutBonds=100)
                for core, chains in frags:
                    output = '%s,%s,%s,%s\n' % (smi, smi_id, core, chains)
                    outlines.add(output)
    return outlines
Esempio n. 13
0
    def test9(self):
        m = Chem.MolFromSmiles("Oc1ccccc1N")

        try:

            frags1 = rdMMPA.FragmentMol(m,
                                        minCuts=1,
                                        maxCuts=0,
                                        maxCutBonds=21,
                                        resultsAsMols=False)
            self.assertTrue(False)  # should not get here
        except ValueError as e:
            self.assertEqual(str(e), "supplied maxCuts is less than minCuts")

        try:

            frags1 = rdMMPA.FragmentMol(m,
                                        minCuts=0,
                                        maxCuts=0,
                                        maxCutBonds=21,
                                        resultsAsMols=False)
            self.assertTrue(False)  # should not get here
        except ValueError as e:
            self.assertEqual(str(e), "minCuts must be greater than 0")
Esempio n. 14
0
    def test6(self):
        m = Chem.MolFromSmiles(
            "CC[C@H](C)[C@@H](C(=O)N[C@H]1CSSC[C@H]2C(=O)NCC(=O)N3CCC[C@H]3C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@H](C(=O)N[C@@H](CSSC[C@@H](C(=O)N[C@H](C(=O)N4CCC[C@H]4C(=O)N[C@H](C(=O)N2)C)CC(=O)N)NC1=O)C(=O)N)CO)Cc5ccc(cc5)O)CCCC[NH3+])N"
        )  # ALPHA-CONOTOXIN SI
        frags = rdMMPA.FragmentMol(m, resultsAsMols=False)
        self.assertFalse(len(frags))
        frags1 = rdMMPA.FragmentMol(m,
                                    minCuts=1,
                                    maxCuts=1,
                                    maxCutBonds=21,
                                    resultsAsMols=False)
        frags2 = rdMMPA.FragmentMol(m,
                                    minCuts=2,
                                    maxCuts=2,
                                    maxCutBonds=21,
                                    resultsAsMols=False)

        frags = rdMMPA.FragmentMol(m,
                                   maxCuts=2,
                                   maxCutBonds=21,
                                   resultsAsMols=False)

        self.assertEqual(set(frags1 + frags2), set(frags))
        self.assertEqual(set(frags1).intersection(set(frags2)), set())
Esempio n. 15
0
 def fragment(self, molecule_pair):
     graph_cores = []
     graph_sidechains = []
     for molecule in molecule_pair:
         graph_frags = rdMMPA.FragmentMol(Chem.MolFromSmiles(
             molecule.smiles),
                                          maxCuts=1,
                                          resultsAsMols=False)
         if len(graph_frags) > 0:
             _, graph_frags = map(list, zip(*graph_frags))
             for frag_pair in graph_frags:
                 core, sidechain = frag_pair.split(".")
                 graph_cores.append(
                     Chem.MolFromSmiles(core.replace("[*:1]", "[1*]")))
                 graph_sidechains.append(
                     Chem.MolFromSmiles(sidechain.replace("[*:1]", "[1*]")))
     return graph_cores, graph_sidechains
Esempio n. 16
0
 def fragmentate(
     self, molecule_pair: Tuple[Chem.Mol, Chem.Mol]
 ) -> Tuple[List[Chem.Mol], List[Chem.Mol]]:
     molecule_cores = []
     molecule_sidechains = []
     for molecule in molecule_pair:
         molecule_frags = rdMMPA.FragmentMol(molecule,
                                             maxCuts=1,
                                             resultsAsMols=False)
         _, molecule_frags = map(list, zip(*molecule_frags))
         for molecule_pair in molecule_frags:
             core, sidechain = molecule_pair.split(".")
             molecule_cores.append(
                 Chem.MolFromSmiles(core.replace("[*:1]", "[1*]")))
             molecule_sidechains.append(
                 Chem.MolFromSmiles(sidechain.replace("[*:1]", "[1*]")))
     return molecule_cores, molecule_sidechains
Esempio n. 17
0
    def test2(self):
        m = Chem.MolFromSmiles('c1ccccc1OC')
        frags = rdMMPA.FragmentMol(m, resultsAsMols=False)
        self.assertEqual(len(frags), 3)
        for frag in frags:
            self.assertEqual(len(frag), 2)
        frags = sorted(frags)
        self.assertEqual(frags[0][0], '')
        self.assertEqual(frags[1][0], '')
        self.assertNotEqual(frags[2][0], '')
        self.assertNotEqual(frags[0][1], '')
        self.assertNotEqual(frags[1][1], '')
        self.assertNotEqual(frags[2][1], '')

        self.assertEqual(frags[0][1], 'CO[*:1].c1ccc(cc1)[*:1]')
        self.assertEqual(frags[1][1], 'C[*:1].c1ccc(cc1)O[*:1]')
        self.assertEqual(frags[2][0], 'O([*:1])[*:2]')
        self.assertEqual(frags[2][1], 'C[*:2].c1ccc([*:1])cc1')
Esempio n. 18
0
def fragment_mol(smi, id):

    mol = Chem.MolFromSmiles(smi)

    #different cuts can give the same fragments
    #to use outlines to remove them
    outlines = set()

    if (mol == None):
        sys.stderr.write("Can't generate mol for: %s\n" % (smi))
    else:
        frags = rdMMPA.FragmentMol(mol,
                                   pattern="[#6+0;!$(*=,#[!#6])]!@!=!#[*]",
                                   resultsAsMols=False)
        for core, chains in frags:
            output = '%s,%s,%s,%s' % (smi, id, core, chains)
            if (not (output in outlines)):
                outlines.add(output)
    return outlines
Esempio n. 19
0
def __fragment_mol_link(mol1, mol2, radius=3, keep_stereo=False, protected_ids_1=None, protected_ids_2=None,
                        return_ids=True):

    def filter_frags(frags, protected_ids):
        output = []
        protected_ids = set(protected_ids)
        for _, chains in frags:
            for atom in chains.GetAtoms():
                if atom.GetAtomicNum() == 0:
                    for d in atom.GetNeighbors():
                        if d.GetAtomicNum() != 1 and d.GetIdx() not in protected_ids:
                            output.append((None, chains))
        return output

    def prep_frags(frags, keep_stereo=False):
        # frags is a list of tuples [(None, frag_mol_1), (None, frag_mol_2), ...]
        ls = []
        for _, chains in frags:
            ids = []
            for atom in chains.GetAtoms():
                if atom.GetAtomicNum() == 0:
                    for d in atom.GetNeighbors():
                        if d.GetAtomicNum() == 1:
                            ids = [d.GetIntProp('Index')]
                if ids:
                    break   # only one such occurrence can be
            a, b = Chem.MolToSmiles(chains, isomericSmiles=keep_stereo).split('.')
            if a == '[H][*:1]':
                ls.append([b, ids])
            else:
                ls.append([a, ids])
        return ls

    if protected_ids_1 or protected_ids_2:
        return_ids = True

    if return_ids:
        for atom in mol1.GetAtoms():
            atom.SetIntProp("Index", atom.GetIdx())
        for atom in mol2.GetAtoms():
            atom.SetIntProp("Index", atom.GetIdx())

    frags_1 = rdMMPA.FragmentMol(mol1, pattern="[#1]!@!=!#[!#1]", maxCuts=1, resultsAsMols=True, maxCutBonds=100)
    frags_2 = rdMMPA.FragmentMol(mol2, pattern="[#1]!@!=!#[!#1]", maxCuts=1, resultsAsMols=True, maxCutBonds=100)

    if protected_ids_1:
        frags_1 = filter_frags(frags_1, protected_ids_1)

    if protected_ids_2:
        frags_2 = filter_frags(frags_2, protected_ids_2)

    frags_1 = prep_frags(frags_1, keep_stereo)
    frags_2 = prep_frags(frags_2, keep_stereo)

    for i in range(len(frags_1)):
        frags_1[i][0] = frags_1[i][0].replace('*:1', '*:2')

    q = []
    for (fr1, ids1), (fr2, ids2) in product(frags_1, frags_2):
        q.append(['%s.%s' % (fr1, fr2), ids1, ids2])

    fake_core = '[*:1]C[*:2]'
    output = []

    for (chains, ids_1, ids_2) in q:
        env, frag = get_canon_context_core(chains, fake_core, radius=radius, keep_stereo=keep_stereo)
        output.append((env, '[H][*:1].[H][*:2]', ids_1, ids_2))

    return output  # list of tuples (env smiles, core smiles, list of atom ids)
Esempio n. 20
0
File: crem.py Progetto: DrrDom/crem
def __fragment_mol(mol,
                   radius=3,
                   return_ids=True,
                   keep_stereo=False,
                   protected_ids=None,
                   symmetry_fixes=False):
    """
    INPUT:
        mol - Mol
        radius - integer, number of bonds to cut context
        keep_stereo - bool, keep or discard information about stereoconfiguration
        protected_ids - set/list/tuple os atom ids which cannot be present in core fragments
        symmetry_fixes - if set, then duplicated fragments having different ids will be returned (useful when one
                         wants to alter only small part of a molecule and it is important atoms with which ids will be
                         replaced)

    OUTPUT:
        list of tuples (env_smi, core_smi, tuple of core atom ids)
        ('C[*:1].C[*:2]', 'CC(C(=O)O)c1ccc(CC([*:1])[*:2])c(Br)c1', (1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))
        ('Cc(c)c(cc)[*:1]', '[H][*:1]', (25,))

    If input mol has explicit hydrogens the output will contain also fragments where core = [H][*:1].
    Smiles of fragments with heavy atoms will contain only heavy atoms
    """
    def get_atom_prop(molecule, prop="Index"):
        res = []
        for a in molecule.GetAtoms():
            if a.GetAtomicNum():
                res.append(a.GetIntProp(prop))
        return tuple(sorted(res))

    if protected_ids:
        return_ids = True

    # due to the bug https://github.com/rdkit/rdkit/issues/3040
    # outputs of rdMMPA.FragmentMol calls will contain duplicated fragments
    # their are removed by using this set
    output = set()

    # set original atom idx to keep them in fragmented mol
    if return_ids:
        for atom in mol.GetAtoms():
            atom.SetIntProp("Index", atom.GetIdx())

    # heavy atoms
    frags = rdMMPA.FragmentMol(mol,
                               pattern="[!#1]!@!=!#[!#1]",
                               maxCuts=4,
                               resultsAsMols=True,
                               maxCutBonds=30)
    frags += rdMMPA.FragmentMol(mol,
                                pattern="[!#1]!@!=!#[!#1]",
                                maxCuts=3,
                                resultsAsMols=True,
                                maxCutBonds=30)
    # hydrogen atoms
    frags += rdMMPA.FragmentMol(mol,
                                pattern="[#1]!@!=!#[!#1]",
                                maxCuts=1,
                                resultsAsMols=True,
                                maxCutBonds=100)

    for i, (core, chains) in enumerate(frags):
        if core is None:  # single cut
            components = list(Chem.GetMolFrags(chains, asMols=True))
            ids_0 = get_atom_prop(components[0]) if return_ids else tuple()
            ids_1 = get_atom_prop(components[1]) if return_ids else tuple()
            if Chem.MolToSmiles(
                    components[0]) != '[H][*:1]':  # context cannot be H
                env, frag = get_canon_context_core(components[0],
                                                   components[1], radius,
                                                   keep_stereo)
                output.add((env, frag, ids_1))
            if Chem.MolToSmiles(
                    components[1]) != '[H][*:1]':  # context cannot be H
                env, frag = get_canon_context_core(components[1],
                                                   components[0], radius,
                                                   keep_stereo)
                output.add((env, frag, ids_0))
        else:  # multiple cuts
            # there are no checks for H needed because H can be present only in single cuts
            env, frag = get_canon_context_core(chains, core, radius,
                                               keep_stereo)
            output.add(
                (env, frag, get_atom_prop(core) if return_ids else tuple()))

    if symmetry_fixes:
        extended_output = __extend_output_by_equivalent_atoms(mol, output)
        if extended_output:
            output.update(extended_output)

    if protected_ids:
        protected_ids = set(protected_ids)
        output = [item for item in output if protected_ids.isdisjoint(item[2])]

    return list(
        output)  # list of tuples (env smiles, core smiles, list of atom ids)