Example #1
0
 def test_phenylene(self):
     # make carboxy and amide benzenes that overlap so that the end result is a phenylene where one ring is oxazine
     conjoined = Chem.MolFromSmiles('c3c1cccc2\C(=O)O/C(-N)c(c12)cc3')
     before = Chem.MolToSmiles(
         conjoined)  # structure from wiki is not canonical
     AllChem.EmbedMolecule(conjoined)
     bonds = [
         conjoined.GetBondBetweenAtoms(0, 1).GetIdx(),
         conjoined.GetBondBetweenAtoms(12, 11).GetIdx(),
         conjoined.GetBondBetweenAtoms(8, 9).GetIdx()
     ]
     fragged = Chem.FragmentOnBonds(conjoined, bonds, addDummies=False)
     fore = Chem.GetMolFrags(fragged, asMols=True, sanitizeFrags=False)[1]
     Chem.SanitizeMol(fore)
     bonds = [
         conjoined.GetBondBetweenAtoms(2, 1).GetIdx(),
         conjoined.GetBondBetweenAtoms(12, 5).GetIdx(),
         conjoined.GetBondBetweenAtoms(8, 6).GetIdx()
     ]
     fragged = Chem.FragmentOnBonds(conjoined, bonds, addDummies=False)
     aft = Chem.GetMolFrags(fragged, asMols=True, sanitizeFrags=False)[0]
     Chem.SanitizeMol(aft)
     # merge them
     mol = Monster([fore, aft]).combine().positioned_mol
     after = Chem.MolToSmiles(mol)
     self.assertEqual(before, after)
def get_ring_ring_splits(input_mol, labels=False, do_comb_index=False):
    """
    Get and break Atom-Atom pairs in two different rings.
    :param input_mol:
    :return:
    """
    # TODO Fix for fused e.g.s
    RI = input_mol.GetRingInfo()
    rings = RI.AtomRings()
    out_mols = []
    bonds = [item for sublist in RI.BondRings() for item in sublist]
    bs = []
    for bond in input_mol.GetBonds():
        if bond.GetIdx() in bonds:
            continue
        id_one = bond.GetBeginAtomIdx()
        id_two = bond.GetEndAtomIdx()
        # Now find all pairs that are in both
        for ring in rings:
            if id_one in ring:
                for ring_two in rings:
                    if ring == ring_two:
                        continue
                    if id_two in ring_two:
                        bs.append(bond)
    if bs:
        for b in bs:
            if labels:
                nm = Chem.FragmentOnBonds(
                    input_mol,
                    [b.GetIdx()],
                    dummyLabels=[(b.GetBeginAtomIdx(), b.GetEndAtomIdx())],
                )
                mols = [
                    x.replace("*", "Xe") for x in Chem.MolToSmiles(
                        nm, isomericSmiles=True).split(".")
                ]
            elif do_comb_index:
                print(b)

                comb_index = get_comb_index(b.GetBeginAtomIdx(),
                                            b.GetEndAtomIdx())
                nm = Chem.FragmentOnBonds(input_mol, [b.GetIdx()],
                                          dummyLabels=[(comb_index, comb_index)
                                                       ])
                mols = [
                    x.replace("*", "Xe") for x in Chem.MolToSmiles(
                        nm, isomericSmiles=True).split(".")
                ]
            else:
                nm = Chem.FragmentOnBonds(input_mol, [b.GetIdx()],
                                          dummyLabels=[(1, 1)])
                # Only takes first
                mols = [
                    x.replace("1*", "Xe") for x in Chem.MolToSmiles(
                        nm, isomericSmiles=True).split(".")
                ]
            out_mols.append(mols)

    return out_mols
Example #3
0
def detriangulate_tets():
    ## test triangle
    mol = Chem.MolFromSmiles('C1CC1')
    AllChem.EmbedMolecule(mol)
    partA = Chem.GetMolFrags(Chem.FragmentOnBonds(mol, [0, 1],
                                                  addDummies=False),
                             asMols=True)[0]
    partB = Chem.FragmentOnBonds(mol, [2], addDummies=False)
Example #4
0
def desquarify_test():
    mol = Chem.MolFromSmiles('C1CCC1')
    AllChem.EmbedMolecule(mol)
    mol.SetProp('_Name', 'cyclobutane')
    partA = Chem.GetMolFrags(Chem.FragmentOnBonds(mol, [0, 2],
                                                  addDummies=False),
                             asMols=True)[0]
    partA.SetProp('_Name', 'small')
    partB = Chem.FragmentOnBonds(mol, [1, 3], addDummies=False)
    partB.SetProp('_Name', 'big')
Example #5
0
 def make_pair_by_split(self, conjoined: Chem.Mol,
                        atom_idx: int) -> Tuple[Chem.Mol, Chem.Mol]:
     # make overlapping mols by getting a single molecule, and split it
     # this gives more control over Chem.rdMolAlign.AlignMol as this may overlap other atoms.
     # negative weights does not work...
     # fore
     bond = conjoined.GetBondBetweenAtoms(atom_idx, atom_idx + 1)
     fragged = Chem.FragmentOnBonds(conjoined, [bond.GetIdx()],
                                    addDummies=False)
     fore = Chem.GetMolFrags(fragged, asMols=True)[0]
     bond = conjoined.GetBondBetweenAtoms(atom_idx - 1, atom_idx)
     fragged = Chem.FragmentOnBonds(conjoined, [bond.GetIdx()],
                                    addDummies=False)
     aft = Chem.GetMolFrags(fragged, asMols=True)[1]
     return fore, aft
Example #6
0
def map_atoms(reactant, product):
    """Returns new atom ordering in product that matches the reactant """

    reac = copy.deepcopy(reactant)
    prod = copy.deepcopy(product)

    Chem.Kekulize(reac, clearAromaticFlags=True)
    Chem.Kekulize(prod, clearAromaticFlags=True)

    reac = change_mol(reac)
    prod = change_mol(prod)

    # Break Bond in reactant, in order to compare to product.
    smarts_bond = Chem.MolFromSmarts('[CX4;H0;R]-[CX4;H1;R]')
    atom_idx = list(reac.GetSubstructMatch(smarts_bond))

    if len(atom_idx) != 0:
        bond = reac.GetBondBetweenAtoms(atom_idx[0], atom_idx[1])
        broken_bond_reac = Chem.FragmentOnBonds(reac, [bond.GetIdx()],
                                                addDummies=False)

        # find new atom order for product
        prod_order = prod.GetSubstructMatch(broken_bond_reac)

    else:
        prod_order = prod.GetSubstructMatch(reac)

    return prod_order
Example #7
0
def fragment_recursive(mol, frags):
    try:
        bonds = list(BRICS.FindBRICSBonds(mol))

        if bonds == []:
            frags.append(mol)
            return frags

        idxs, labs = list(zip(*bonds))

        bond_idxs = []
        for a1, a2 in idxs:
            bond = mol.GetBondBetweenAtoms(a1, a2)
            bond_idxs.append(bond.GetIdx())

        order = np.argsort(bond_idxs).tolist()
        bond_idxs = [bond_idxs[i] for i in order]

        broken = Chem.FragmentOnBonds(mol,
                                      bondIndices=[bond_idxs[0]],
                                      dummyLabels=[(0, 0)])
        head, tail = Chem.GetMolFrags(broken, asMols=True)
        print(mol_to_smiles(head), mol_to_smiles(tail))
        frags.append(head)

        fragment_recursive(tail, frags)
    except Exception:
        pass
Example #8
0
def obtain_rings(smi):
    '''Obtain a list of all rings present in SMILE string smi
    
    Examples:
    >>> obtain_rings('CCC1=CC=CC=C1')
    ['c1ccccc1']
    >>> obtain_rings('C1=CC=C(C=C1)C1=CC=CC=C1')
    ['c1ccccc1', 'c1ccccc1']
    >>> obtain_rings('C1=CC2=C(C=C1)C=CC=C2')
    (None, None)
    
    Parameters:
    smi (string) : SMILE string of a molecule 
    
    Returns
    (list)       : List if all rings in a SMILE string 
    '''
    mol = Chem.MolFromSmiles(smi)
    rot = get_rot_bonds_posn(mol) # Get rotatble bond positions
    
    if len(rot) == 0:
        return None, None
    
    bond_idx = get_bond_indeces(mol, rot)
    new_mol = Chem.FragmentOnBonds(mol, bond_idx, addDummies=False) 
    new_smile = Chem.MolToSmiles(new_mol)
    
    smile_split_list = new_smile.split(".") 
    rings = []
    for item in smile_split_list:
        if '1' in item:
            rings.append(item)
    return rings 
Example #9
0
def c2apbp(info_pds_smiles):
    '''
    拆解反应位点标1和4的分子,返回分子碎片(canonical)
    '''
    mol = Chem.MolFromSmiles(info_pds_smiles)
    maped_atom_index = []
    for atom in mol.GetAtoms():
        if atom.HasProp('molAtomMapNumber'):
            if atom.GetProp('molAtomMapNumber') in ['1', '4']:
                maped_atom_index.append(atom.GetIdx())
    bonds_id = []
    maped_atom_index_copy = deepcopy(maped_atom_index)
    if len(maped_atom_index) == 1:
        return info_pds_smiles
    for x in maped_atom_index:
        for y in maped_atom_index_copy:
            if mol.GetBondBetweenAtoms(x, y) is not None:
                bonds_id.append(mol.GetBondBetweenAtoms(x, y).GetIdx())
    set_bonds_id = list(set(bonds_id))
    if len(set_bonds_id) == 0:
        return info_pds_smiles
    frags = Chem.FragmentOnBonds(mol, set_bonds_id)
    frags_smiles = Chem.MolToSmiles(frags, canonical=True)
    frags_smiles_sub = re.sub('\[([0-9]+)\*\]', '*', frags_smiles)
    if Chem.MolFromSmiles(frags_smiles_sub) is None:
        return Chem.MolToSmiles(Chem.MolFromSmarts(frags_smiles_sub), canonical=True)
    frags_end = Chem.MolToSmiles(Chem.MolFromSmiles(frags_smiles_sub), canonical=True)
    return frags_end
def cut_ring(mol):
    for i in range(10):
        if random.random() < 0.5:
            if not mol.HasSubstructMatch(Chem.MolFromSmarts("[R]@[R]@[R]@[R]")):
                return None
            bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts("[R]@[R]@[R]@[R]")))
            bis = ((bis[0], bis[1]), (bis[2], bis[3]))
        else:
            if not mol.HasSubstructMatch(Chem.MolFromSmarts("[R]@[R;!D2]@[R]")):
                return None
            bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts("[R]@[R;!D2]@[R]")))
            bis = ((bis[0], bis[1]), (bis[1], bis[2]))

        # print bis
        bs = [mol.GetBondBetweenAtoms(x, y).GetIdx() for x, y in bis]

        fragments_mol = Chem.FragmentOnBonds(
            mol, bs, addDummies=True, dummyLabels=[(1, 1), (1, 1)]
        )

        try:
            fragments = Chem.GetMolFrags(fragments_mol, asMols=True)
        except:
            return None

        if len(fragments) == 2:
            return fragments

    return None
Example #11
0
def frag_on_bonds(m, atom_ids):
  breaking_bonds = list()
  for bond in m.GetBonds():
    start, end = bond.GetBeginAtom().GetIdx(), bond.GetEndAtom().GetIdx()
    if start in atom_ids and end not in atom_ids:
      breaking_bonds.append(bond.GetIdx())
    if end in atom_ids and start not in atom_ids:
      breaking_bonds.append(bond.GetIdx())
  my_copy = copy.deepcopy(m)
  return Chem.FragmentOnBonds(my_copy, breaking_bonds)
Example #12
0
def break_on_bond(mol, bond, min_length=3):
    if mol.GetNumAtoms() - bond <= min_length:
        return [mol]

    broken = Chem.FragmentOnBonds(mol,
                                  bondIndices=[bond],
                                  dummyLabels=[(0, 0)])

    res = Chem.GetMolFrags(broken, asMols=True, sanitizeFrags=False)

    return res
def remove_bonds(mol, list_of_atomiso_bondsets_to_remove):
    """
    This function removes bond from an rdkit mol based on
    a provided list. This list is a list of sets, with each set containing
    two atoms with the isotope label of that atom. Using Isotopes is to ensure
    that atom Idx dont change.

    Inputs:
    :param rdkit.Chem.rdchem.Mol mol: any rdkit mol
    :param list list_of_atomiso_bondsets_to_remove: a list of idx values to remove
                                        from mol
    Returns:
    :returns: rdkit.Chem.rdchem.Mol new_mol: the rdkit mol as input but with
                                            the atoms from the list removed
    """
    # None's often end up in a pipeline use of RDKit so we handle this data type as return None
    # instead of raise TypeError
    if mol is None:
        return None

    # If mol is wrong data type (excluding None) raise TypeError
    if type(mol) != rdkit.Chem.rdchem.Mol and type(
            mol) != rdkit.Chem.rdchem.RWMol:
        printout = "mol is the wrong data type. \n"
        printout = printout + "Input should be a rdkit.Chem.rdchem.Mol\n"
        printout = printout + "Input mol was {} type.".format(type(mol))
        raise TypeError(printout)
    new_mol = copy.deepcopy(mol)
    if len(list_of_atomiso_bondsets_to_remove) == 0:
        return None
    for atomiso_bondsets in list_of_atomiso_bondsets_to_remove:
        if len(atomiso_bondsets) == 0:
            continue
        if len(atomiso_bondsets) != 2:
            printout = "list_of_atomiso_bondsets_to_remove needs to be 2 isolabels for the atoms"
            raise TypeError(printout)

        atom_1_idx = int(get_atom_w_iso_num(new_mol, atomiso_bondsets[0]))
        atom_2_idx = int(get_atom_w_iso_num(new_mol, atomiso_bondsets[1]))

        try:
            new_mol = Chem.FragmentOnBonds(new_mol, [atom_1_idx, atom_2_idx],
                                           addDummies=False)
        except:
            return None

        new_mol = MOH.check_sanitization(new_mol)
        if new_mol is None:
            return None
    new_mol = MOH.check_sanitization(new_mol)
    if new_mol is None:
        return None
    return new_mol
Example #14
0
def GetFragments(
    smiles: str,
    mol: Chem.rdchem.Mol,
    neighbor_ids: list,
    atomic_nums: list,
    bond_id_matrix: list,
    bond_type_matrix: list,
) -> Tuple[list, list]:
    """Fragment the molecule with isolated carbons method, see
    Lian and Yalkowsky, JOURNAL OF PHARMACEUTICAL SCIENCES 103:2710-2723."""

    # carbons
    cids = [i for (i, x) in enumerate(atomic_nums) if x == 6]

    # carbon neighbor ids
    cnids = NeighborIDs(neighbor_ids, atomic_nums, cids)

    # bond ids
    bids = [[bond_id_matrix[cid][cnid] for cnid in cnids]
            for (cid, cnids) in zip(cids, cnids)]

    # bond types
    bts = [[bond_type_matrix[cid][cnid] for cnid in cnids]
           for (cid, cnids) in zip(cids, cnids)]

    # broken bond ids
    bbids = FindBreakingBonds(cnids, bids, bts, atomic_nums)

    # break bonds, get fragments
    try:
        fmol = Chem.FragmentOnBonds(
            mol, UniqueElements(list(itertools.chain.from_iterable(bbids))))
    except:
        fmol = mol
        logging.info("fragmentation exception: %s" % (smiles))

    # draw fragments, debugging only, expensive
    # Draw.MolToFile(fmol,'fmol.png')

    # fragment atom ids
    faids = [list(x) for x in Chem.rdmolops.GetMolFrags(fmol)]

    # fragment smiles
    fsmiles = [
        Chem.rdmolfiles.MolFragmentToSmiles(fmol, frag) for frag in faids
    ]

    # fragment smarts
    fsmarts = [
        Chem.rdmolfiles.MolFragmentToSmarts(fmol, frag) for frag in faids
    ]

    return faids, fsmiles, fsmarts
Example #15
0
def all_fragment_on_bond(mol,
                         asMols=False,
                         max_num_action=float("Inf"),
                         break_aromatic=True):
    """Fragment all possible bond in a molecule and return the set of resulting fragments
    This is similar to `random_bond_cut`, but is not stochastic as it does not return a random fragment
    but all the fragments resulting from all potential bond break in the molecule.

    .. note::
        This will always be a subset of all_bond_remove, the main difference being that all_bond_remove, allow decreasing
        bond count, while this one will always break a molecule into two.

    Args:
        mol: <Chem.Mol>
            input molecule
        asMols: bool, optional
            Whether to return results as mols or smiles
        max_num_action: float, optional
            Maximum number of action to reduce complexity
        break_aromatic: bool, optional
            Whether to attempt to break even aromatic bonds
            (Default: True)

    Returns:
        set of fragments

    """
    mol.GetRingInfo().AtomRings()
    fragment_set = set([])
    bonds = list(mol.GetBonds())
    stop = False
    if bonds:
        if break_aromatic:
            Chem.Kekulize(mol, clearAromaticFlags=True)
        for bond in bonds:
            if stop:
                break
            if break_aromatic or not bond.GetIsAromatic():
                truncate = Chem.FragmentOnBonds(mol, [bond.GetIdx()],
                                                addDummies=False)
                truncate = dm.sanitize_mol(truncate)
                if truncate is not None:
                    for frag in rdmolops.GetMolFrags(truncate, asMols=True):
                        frag = dm.sanitize_mol(frag)
                        if frag:
                            if not asMols:
                                frag = dm.to_smiles(frag)
                            fragment_set.add(frag)
                        if len(fragment_set) > max_num_action:
                            stop = True
                            break
    return fragment_set
Example #16
0
def break_bonds(mol_dict, num_bonds):
    frag_smiles = []
    mol_frags = {}
    for key in mol_dict:
        mol = mol_dict[key]
        for bond in range(num_bonds):
            mol_frag = Chem.FragmentOnBonds(mol, [bond], addDummies=False)
            smiles = Chem.MolToSmiles(mol_frag, isomericSmiles=False)
            if smiles not in frag_smiles:
                frag_smiles.append(smiles)
                mol_frags[smiles] = mol_frag

    return frag_smiles, mol_frags
Example #17
0
def cut(mol):
    if not mol.HasSubstructMatch(Chem.MolFromSmarts('[*]-;!@[*]')): 
        return None
    bis = random.choice(mol.GetSubstructMatches(Chem.MolFromSmarts('[*]-;!@[*]')))  # single bond not in ring
    bs = [mol.GetBondBetweenAtoms(bis[0],bis[1]).GetIdx()]
  
    fragments_mol = Chem.FragmentOnBonds(mol,bs,addDummies=True,dummyLabels=[(1, 1)])
  
    try:
        fragments = Chem.GetMolFrags(fragments_mol,asMols=True)
        return fragments
    except:
        return None
Example #18
0
def get_fragments(input_mol, iso_labels=True, get_index_iso_map=False):
    """
    Find the frgments for a given molecule
    :param input_mol:
    :return:
    """
    index_isotope_map = {}
    atom_indices = input_mol.GetSubstructMatches(
        Chem.MolFromSmarts(SMARTS_PATTERN))
    if atom_indices and iso_labels:
        counter = 100
        labels = []
        bs = []
        for bi in atom_indices:
            b = input_mol.GetBondBetweenAtoms(bi[0], bi[1])
            if counter in index_isotope_map:
                index_isotope_map[counter].append(b.GetIdx())
            else:
                index_isotope_map[counter] = [b.GetIdx()]
            labels.append((counter, counter))
            bs.append(b.GetIdx())
            counter += 1
        input_mol = Chem.FragmentOnBonds(input_mol, bs, dummyLabels=labels)
    elif atom_indices:
        bs = []
        labels = []
        for bi in atom_indices:
            b = input_mol.GetBondBetweenAtoms(bi[0], bi[1])
            bs.append(b.GetIdx())
            comb_index = get_comb_index(bi[0], bi[1])
            labels.append((comb_index, comb_index))
        input_mol = Chem.FragmentOnBonds(input_mol, bs, dummyLabels=labels)
        return get_frag_list(str_find="*", input_mol=input_mol)
    if get_index_iso_map:
        return get_frag_list(str_find="*",
                             input_mol=input_mol), index_isotope_map
    else:
        return get_frag_list(str_find="*", input_mol=input_mol)
Example #19
0
    def enumerate(self, mol, cuts):
        """
        Enumerates all possible combination of slicings of a molecule given a number of cuts.
        :param mol: A mol object with the molecule to slice.
        :param cuts: The number of cuts to perform.
        :return : A list with all the possible (scaffold, decorations) pairs as SlicedMol objects.
        """
        matches = self._get_matches(mol)
        sliced_mols = set()
        for atom_pairs_to_cut in itertools.combinations(matches, cuts):
            to_cut_bonds = list(
                sorted(
                    mol.GetBondBetweenAtoms(aidx, oaidx).GetIdx()
                    for aidx, oaidx in atom_pairs_to_cut))
            attachment_point_idxs = [(i, i) for i in range(len(to_cut_bonds))]
            cut_mol = rkc.FragmentOnBonds(mol,
                                          bondIndices=to_cut_bonds,
                                          dummyLabels=attachment_point_idxs)
            for atom in cut_mol.GetAtoms():
                if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN:
                    num = atom.GetIsotope()
                    atom.SetIsotope(0)
                    atom.SetProp("molAtomMapNumber", str(num))

            cut_mol.UpdatePropertyCache()
            fragments = rkc.GetMolFrags(cut_mol,
                                        asMols=True,
                                        sanitizeFrags=True)

            # detect whether there is one fragment with as many attachment points as cuts (scaffold)
            # the rest are decorations
            if cuts == 1:
                sliced_mols.add(SlicedMol(fragments[0], [fragments[1]]))
                sliced_mols.add(SlicedMol(fragments[1], [fragments[0]]))
            else:
                scaffold = None
                decorations = []
                for frag in fragments:
                    num_att = len([
                        atom for atom in frag.GetAtoms()
                        if atom.GetSymbol() == ATTACHMENT_POINT_TOKEN
                    ])
                    if num_att == cuts and not scaffold:
                        scaffold = frag
                    else:
                        decorations.append(frag)
                if scaffold:
                    sliced_mols.add(SlicedMol(scaffold, decorations))

        return list(filter(self._filter, sliced_mols))
Example #20
0
def delete_bonds(mol, bonds, ftype, hac):
  """ Fragment molecule on bonds and reduce to fraggle fragmentation SMILES.
  If none exists, returns None """

  # Replace the given bonds with attachment points (B1-B2 -> B1-[*].[*]-B2)
  bondIdx = [mol.GetBondBetweenAtoms(*bond).GetIdx() for bond in bonds]
  modifiedMol = Chem.FragmentOnBonds(mol, bondIdx, dummyLabels=[(0, 0)] * len(bondIdx))

  # should be able to get away without sanitising mol as the valencies should be okay
  # do not do a full sanitization, but do find rings and calculate valences:
  Chem.SanitizeMol(modifiedMol, Chem.SanitizeFlags.SANITIZE_PROPERTIES |
                   Chem.SanitizeFlags.SANITIZE_SYMMRINGS)

  fragments = Chem.GetMolFrags(modifiedMol, asMols=True, sanitizeFrags=False)
  return select_fragments(fragments, ftype, hac)
Example #21
0
def spf(mol, split_id):

    bonds = mol.GetBonds()
    for i in range(len(bonds)):
        if okToBreak(bonds[i]):
            mol = Chem.FragmentOnBonds(mol, [i],
                                       addDummies=True,
                                       dummyLabels=[(0, 0)])
            # Dummy atoms are always added last
            n_at = mol.GetNumAtoms()
            mol.GetAtomWithIdx(n_at - 1).SetAtomicNum(split_id)
            mol.GetAtomWithIdx(n_at - 2).SetAtomicNum(split_id)
            return Chem.rdmolops.GetMolFrags(mol, asMols=True)

    # If the molecule could not been split, return original molecule
    return [mol]
 def merge(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol,
           anchor_index: int, attachment_details: List[Dict]) -> Chem.Mol:
     for detail in attachment_details:
         attachment_index = detail['idx_F']  # fragmentanda attachment_index
         scaffold_attachment_index = detail['idx_S']
         bond_type = detail['type']
         f = Chem.FragmentOnBonds(fragmentanda, [
             fragmentanda.GetBondBetweenAtoms(anchor_index,
                                              attachment_index).GetIdx()
         ],
                                  addDummies=False)
         frag_split = []
         fragmols = Chem.GetMolFrags(f,
                                     asMols=True,
                                     fragsMolAtomMapping=frag_split,
                                     sanitizeFrags=False)
         if self._debug_draw:
             print(frag_split)
         # Get the fragment of interest.
         ii = 0
         for mol_N, indices in enumerate(frag_split):
             if anchor_index in indices:
                 break
             ii += len(indices)
         else:
             raise Exception
         frag = fragmols[mol_N]
         frag_anchor_index = indices.index(anchor_index)
         if self._debug_draw:
             self.draw_nicely(frag)
         combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag))
         scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms()
         if self._debug_draw:
             print(scaffold_anchor_index, scaffold_attachment_index,
                   anchor_index, scaffold.GetNumAtoms())
             self.draw_nicely(combo)
         combo.AddBond(scaffold_anchor_index, scaffold_attachment_index,
                       bond_type)
         Chem.SanitizeMol(
             combo,
             sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS +
             Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY,
             catchErrors=True)
         if self._debug_draw:
             self.draw_nicely(combo)
         scaffold = combo
     return scaffold
Example #23
0
def fragment_on_bonds_and_label(mol, bonds):
    labels = []
    atom_type_to_index = {}
    for bi in bonds:
        b = mol.GetBondWithIdx(bi)
        i = b.GetBeginAtomIdx()
        j = b.GetEndAtomIdx()
        # get or create dictionary keys for those atom types
        ai = mol.GetAtomWithIdx(i)
        aj = mol.GetAtomWithIdx(j)
        at_i = common.type_atom(ai)
        at_j = common.type_atom(aj)
        vi = index_for_atom_type(atom_type_to_index, at_i)
        vj = index_for_atom_type(atom_type_to_index, at_j)
        labels.append((vi, vj))
    fragmented = Chem.FragmentOnBonds(mol, bonds, dummyLabels=labels)
    smi = Chem.MolToSmiles(fragmented)
    name = get_name(mol)
    index_to_atom_type = dict_reverse_binding(atom_type_to_index)
    return (smi, name, index_to_atom_type)
Example #24
0
def fragment_mol(mol, frag_label):
    excl_smarts = [
        '[CX3](=[OX1])[OX1]',
        '[SX4](=[OX1])=[OX1]',
        '[NX3](=[OX1])[OX1]',
        '[OX1]=C[NX3][*]',
        '[#6r5]~[#7r6]~[#6r6]~[#7r6]~[#6r6]~[#6r5]',  # Special for aspire
        '[SX3](=[OX1])'
    ]
    excl_substructs = [Chem.MolFromSmarts(smart) for smart in excl_smarts]
    excl_idx = []
    for es in excl_substructs:
        excl_idx.extend(list(mol.GetSubstructMatches(es)))
    excl_idx = [i for sublist in excl_idx for i in sublist]

    sigma_bond_idx = []
    frag_dict = dict()
    for bnd in mol.GetBonds():
        if bnd.GetIsAromatic():
            continue
        elif bnd.GetBeginAtom().GetAtomicNum() == 1:
            continue
        elif bnd.GetEndAtom().GetAtomicNum() == 1:
            continue
        elif bnd.GetEndAtomIdx() in excl_idx and bnd.GetBeginAtomIdx(
        ) in excl_idx:
            continue
        else:
            sigma_bond_idx.append(bnd.GetIdx())

    fragmentation = Chem.FragmentOnBonds(mol, sigma_bond_idx, addDummies=False)
    frags_as_idx = Chem.GetMolFrags(fragmentation)

    for n, indices in enumerate(frags_as_idx):
        resname = frag_label
        resid = n + 1
        for idx in indices:
            frag_dict[idx] = {'resname': resname, 'resid': resid}

    return frag_dict, frags_as_idx
Example #25
0
def fragment_molecule_on_explicit_hydrogens(smiles):
    num_heavies = get_num_heavies_from_smiles(smiles)
    smiles_with_H = Chem.CanonSmiles(smiles)
    input_mol = Chem.MolFromSmiles(
        smiles,
        sanitize=False)  # use santize=False to preserve explicit hydrogens
    Chem.SanitizeMol(input_mol, Chem.SANITIZE_ALL)

    cut_pairs = input_mol.GetSubstructMatches(_hydrogen_cut_pat)

    fragmentations = []
    for cut_pair in cut_pairs:
        bond_idx = input_mol.GetBondBetweenAtoms(*cut_pair).GetIdx()
        fragmented_mol = Chem.FragmentOnBonds(input_mol, [bond_idx],
                                              dummyLabels=[(0, 0)])
        new_smiles = Chem.MolToSmiles(fragmented_mol, isomericSmiles=True)

        left, mid, right = new_smiles.partition(".")
        assert mid == ".", new_smiles

        if left == "[*][H]":  # Hard-coded
            cut_smiles = right
        elif right == "[*][H]":
            cut_smiles = left
        else:
            raise AssertionError("did not split hydrogen correctly: %r %r" %
                                 (smiles, new_smiles))

        if "[H]" in cut_smiles:
            # If there were multiple [H] atoms, then we cut on one but others remain.
            # Recanonicalize to remove them.
            cut_smiles = Chem.CanonSmiles(cut_smiles)

        new_fragmentation = Fragmentation(1, EnumerationLabel.NO_ENUMERATION,
                                          0, "1", "[*][H]", "0", num_heavies,
                                          "1", cut_smiles, None)

        fragmentations.append(new_fragmentation)

    return fragmentations
Example #26
0
    def encode(self, smiles, subs):
        output = np.zeros([len(smiles), self.max_len - self.n_frags - 1, 5],
                          dtype=np.long)
        connect = np.zeros([len(smiles), self.n_frags + 1, 5], dtype=np.long)
        for i, s in enumerate(smiles):
            mol = Chem.MolFromSmiles(s)
            sub = Chem.MolFromSmiles(subs[i])
            # Chem.Kekulize(sub)
            sub_idxs = mol.GetSubstructMatches(sub)
            for sub_idx in sub_idxs:
                sub_bond = [
                    mol.GetBondBetweenAtoms(
                        sub_idx[b.GetBeginAtomIdx()],
                        sub_idx[b.GetEndAtomIdx()]).GetIdx()
                    for b in sub.GetBonds()
                ]
                sub_atom = [mol.GetAtomWithIdx(ix) for ix in sub_idx]
                split_bond = {
                    b.GetIdx()
                    for a in sub_atom for b in a.GetBonds()
                    if b.GetIdx() not in sub_bond
                }
                single = sum([
                    int(mol.GetBondWithIdx(b).GetBondType())
                    for b in split_bond
                ])
                if single == len(split_bond): break
            frags = Chem.FragmentOnBonds(mol, list(split_bond))

            Chem.MolToSmiles(frags)
            rank = eval(frags.GetProp('_smilesAtomOutputOrder'))
            mol_idx = list(sub_idx) + [
                idx for idx in rank
                if idx not in sub_idx and idx < mol.GetNumAtoms()
            ]
            frg_idx = [
                i + 1 for i, f in enumerate(Chem.GetMolFrags(sub)) for _ in f
            ]

            Chem.Kekulize(mol)
            m, n, c = [(self.tk2ix['GO'], 0, 0, 0, 1)], [], [(self.tk2ix['GO'],
                                                              0, 0, 0, 0)]
            mol2sub = {ix: i for i, ix in enumerate(mol_idx)}
            for j, idx in enumerate(mol_idx):
                atom = mol.GetAtomWithIdx(idx)
                bonds = sorted(atom.GetBonds(),
                               key=lambda x: mol2sub[x.GetOtherAtomIdx(idx)])
                bonds = [
                    b for b in bonds if j > mol2sub[b.GetOtherAtomIdx(idx)]
                ]
                n_split = sum(
                    [1 if b.GetIdx() in split_bond else 0 for b in bonds])
                tk = self.get_atom_tk(atom)
                for k, bond in enumerate(bonds):
                    ix2 = mol2sub[bond.GetOtherAtomIdx(idx)]
                    is_split = bond.GetIdx() in split_bond
                    if idx in sub_idx:
                        is_connect = is_split
                    elif len(bonds) == 1:
                        is_connect = False
                    elif n_split == len(bonds):
                        is_connect = is_split and k != 0
                    else:
                        is_connect = False
                    if bond.GetIdx() in sub_bond:
                        bin, f = m, frg_idx[j]
                    elif is_connect:
                        bin, f = c, 0
                    else:
                        bin, f = n, 0
                    if bond.GetIdx() in sub_bond or not is_connect:
                        tk2 = tk
                        tk = self.tk2ix['*']
                    else:
                        tk2 = self.tk2ix['*']
                    bin.append((tk2, j, ix2, int(bond.GetBondType()), f))
                if tk != self.tk2ix['*']:
                    bin, f = (m, frg_idx[j]) if idx in sub_idx else (n, f)
                    bin.append((tk, j, j, 0, f))
            output[i, :len(m + n), :] = m + n
            if len(c) > 0:
                connect[i, :len(c)] = c
        return np.concatenate([output, connect], axis=1)
Example #27
0
def maximum_curvature(smi):
    mols = []
    m = Chem.MolFromSmiles(smi)
    # Draw.MolToImage(m, options=opts).save('m3d.png')
    # Draw.MolToImage(m).save('m3d1.png')
    mols.append(m)
    m3d = Chem.AddHs(m)
    AllChem.EmbedMolecule(m3d, randomSeed=1)
    # Draw.MolToImage(m3d, size=(250, 250)).show()
    m3d_without_h = Chem.RemoveHs(m3d)
    mols.append(m3d_without_h)
    bonds = m3d.GetBonds()
    single_bonds_id = []

    # find all single bonds without 'H' as end atom
    for bond in bonds:
        bond_type = bond.GetBondType()
        # print(bond_type)
        begin_atom = bond.GetBeginAtom()
        end_atom = bond.GetEndAtom()
        # print('begin atom: ' + begin_atom.GetSymbol())
        # print('end atom: ' + end_atom.GetSymbol())
        if str(bond_type) == 'SINGLE':
            if end_atom.GetSymbol() != 'H':
                single_bonds_id.append(bond.GetIdx())

    if len(single_bonds_id) == 0:
        print('no single bond, return')
        return
    frags = Chem.FragmentOnBonds(m, single_bonds_id)
    smis = Chem.MolToSmiles(frags)
    smis = smis.split('.')
    frags_ids = []
    for smi in smis:
        # print(smi)
        frag_ids = []
        mols.append(Chem.MolFromSmiles(smi))
        patt = Chem.MolFromSmarts(smi)
        flag = m.HasSubstructMatch(patt)
        if flag:
            atomids = m.GetSubstructMatches(patt)
            # print("matched atom id:", atomids)
            for atomid in atomids:
                frag_ids.append(atomid)

            if frag_ids not in frags_ids:
                frags_ids.append(frag_ids)
        else:
            print("molecular m do not contain group: ", smi)

    # print(frags_ids) frags_ids: [[(14, 15, 16), (17, 26, 27), (36, 37, 38), (39, 48, 49)], [(4, 5, 6, 28)], [(5, 6,
    # 7, 8, 9, 12, 10, 11), (5, 28, 29, 30, 31, 34, 32, 33)] ...]
    frag_angle = []
    for frag_ids in frags_ids:
        for atomids in frag_ids:
            atom_planes = atom_plane_segmentation(atomids, m3d)
            if atom_planes is None or len(atom_planes) == 0:
                continue
            else:
                plane_angles = []
                for i in range(len(atom_planes)):
                    for j in range(i + 1, len(atom_planes)):
                        plane_angle = angle_of_two_planes(atom_planes[i], atom_planes[j])
                        plane_angles.append(plane_angle)

                max_plane_angle = max(plane_angles)
                frag_angle.append(max_plane_angle)

    print('max angle:', max(frag_angle))
    # img = Draw.MolsToGridImage(mols, molsPerRow=5, subImgSize=(400, 400), legends=['' for x in mols], options=opts)
    # img.show()
    return frag_angle
Example #28
0
    def __decompose_mol(self, Mol):

        ### Mol: Molecule that will be decompsed. Must be an instance of
        ###      Rdkit molecule object.

        ### Holds bond indices of broken bonds at r/l anchor
        ranc_bond_idcs = list()
        lanc_bond_idcs = list()

        ### Holds atom indices of broken bonds at r/l anchor
        ranc_atom_idcs = list()
        lanc_atom_idcs = list()

        ### Holds atom indices of bonds at terminal r/l anchor.
        ### They won't be broken.
        ranc_atom_idcs_t = list()
        lanc_atom_idcs_t = list()

        ### Holds atom indices of broken bonds at connectors
        rconn_atom_idcs = list()
        lconn_atom_idcs = list()

        ### Holds atom indices of bonds between fragment and
        ### terminal connectors. They won't be broken.
        rconn_atom_idcs_t = list()
        lconn_atom_idcs_t = list()

        ### Holds permutation parity of atoms involved in broken bonds
        ranc_parity = list()
        lanc_parity = list()

        ### Stores if anchor atom is chiral or not
        ranc_chiral = list()
        lanc_chiral = list()

        ### Stores bond type information for bonds between fragment and connector/cap
        ranc_bondtype = list()
        lanc_bondtype = list()

        ### Stores the connector index of the bond
        rconn_idcs = list()
        lconn_idcs = list()

        ### Stores the connector index of the bond with terminal anchor
        rconn_idcs_t = list()
        lconn_idcs_t = list()

        ### Stores atom indices of all anchor atoms in molecule
        ranc_mol = list()
        lanc_mol = list()

        ### Stores atom indices of all terminal anchor atoms in molecule
        ranc_mol_t = list()
        lanc_mol_t = list()

        ### List that holds all atom indices
        Mol_atm_idxs = range(Mol.GetNumAtoms())

        if self.verbose:
            print("Start analyzing bonds...")

        ### Begin loop over all connectors
        for connector_idx in range(self.__connector_count):

            conn_obj = self.connectors[connector_idx]

            connector = conn_obj.get_connector()
            lanc = conn_obj.get_lanc()
            ranc = conn_obj.get_ranc()
            ring = conn_obj.get_ring()

            ranc_map = conn_obj.get_ranc_map()
            lanc_map = conn_obj.get_lanc_map()

            connector_mol_matches = Mol.GetSubstructMatches(connector,
                                                            useChirality=True)
            connector_mol_match_count = len(connector_mol_matches)

            connector_mol_match_exclude = list()

            if self.verbose:
                print(
                    "Decomposing with connector ID %d (%s)..." %
                    (connector_idx, self.connectors[connector_idx].get_name()))
                print("Found %d matches." % connector_mol_match_count)

            if connector_mol_match_count == 0:
                if self.verbose:
                    print("No matches found for current connector.", )
                    print("No decomposition.")
                continue

            ### Find the corresponding atom indices of the anchor atoms
            lanc_mol_matches = list()
            ranc_mol_matches = list()
            for idx1 in range(connector_mol_match_count):
                ranc_mol_matches.append(list())
                lanc_mol_matches.append(list())
                for r_i in ranc:
                    ranc_mol_matches[-1].append(
                        connector_mol_matches[idx1][r_i])
                for l_i in lanc:
                    lanc_mol_matches[-1].append(
                        connector_mol_matches[idx1][l_i])

            ### Filter substructure matches that do not unambigously
            ### identify connectors, i.e. they have the same connector
            ### atoms except the anchor atoms. Also, identify all
            ### connectors, that overlap with other, previously matched,
            ### connectors.
            ### Add those connector matches to an exclcude list.
            for idx1 in range(connector_mol_match_count):
                for idx2 in range(connector_mol_match_count):
                    if idx1 < idx2:
                        for idx1_match in connector_mol_matches[idx1]:
                            idx1_match_1 = -1
                            idx1_match_2 = -1
                            if idx1_match in lanc_mol_matches[idx1]:
                                idx1_match_1 = lanc_mol_matches[idx1].index(
                                    idx1_match)
                            if idx1_match in lanc_mol_matches[idx2]:
                                idx1_match_2 = lanc_mol_matches[idx2].index(
                                    idx1_match)
                            if idx1_match_1 > -1 and idx1_match_1 == idx1_match_2:
                                if lanc_map[idx1_match_1] == -1:
                                    continue
                                if idx1 not in connector_mol_match_exclude:
                                    connector_mol_match_exclude.append(idx1)
                                if idx2 not in connector_mol_match_exclude:
                                    connector_mol_match_exclude.append(idx2)
                                if self.verbose:
                                    print("Excluded connector match due to", )
                                    print("ambigious matching on lanc atoms", )
                                    print("%s (match id %d,%d)." %
                                          (idx1_match, idx1, idx2))
                                continue

                            idx1_match_1 = -1
                            idx1_match_2 = -1
                            if idx1_match in ranc_mol_matches[idx1]:
                                idx1_match_1 = ranc_mol_matches[idx1].index(
                                    idx1_match)
                            if idx1_match in ranc_mol_matches[idx2]:
                                idx1_match_2 = ranc_mol_matches[idx2].index(
                                    idx1_match)
                            if idx1_match_1 > -1 and idx1_match_1 == idx1_match_2:
                                if ranc_map[idx1_match_1] == -1:
                                    continue
                                if idx1 not in connector_mol_match_exclude:
                                    connector_mol_match_exclude.append(idx1)
                                if idx2 not in connector_mol_match_exclude:
                                    connector_mol_match_exclude.append(idx2)
                                if self.verbose:
                                    print("Excluded connector match due to", )
                                    print("ambigious matching on ranc atoms", )
                                    print("%s (match id %d,%d)." %
                                          (idx1_match, idx1, idx2))
                                continue

            if self.verbose:
                print("Excluded connector matches: ",
                      connector_mol_match_exclude)

            ### Begin loop over connector_mol_matches
            for idx1 in range(connector_mol_match_count):
                connector_mol_match = connector_mol_matches[idx1]
                ranc_mol_match = ranc_mol_matches[idx1]
                lanc_mol_match = lanc_mol_matches[idx1]

                if conn_obj.get_terminal():
                    for term_anc_idx in conn_obj.get_terminal_anc():
                        term_anc_atm = Mol.GetAtomWithIdx(
                            connector_mol_match[term_anc_idx])
                        for term_neighbor in term_anc_atm.GetNeighbors():
                            term_neighbor_idx = term_neighbor.GetIdx()
                            if term_neighbor_idx not in connector_mol_match:
                                connector_mol_match_exclude.append(idx1)
                                if self.verbose:
                                    print("Excluded current connector match", )
                                    print(
                                        "due to terminal anchor atom %d at non-terminal"
                                        % connector_mol_match[term_anc_idx], )
                                    print("position connected to atom %d." %
                                          term_neighbor_idx)

                if self.verbose:
                    print("Current connector_mol_match ID", idx1)
                    print("Current connector:", connector_mol_match)

                ranc_bond_idcs_tmp, ratom_idcs_tmp = get_frag_bonds(
                    Mol, ranc_mol_match, connector_mol_match, ring,
                    self.verbose)

                lanc_bond_idcs_tmp, latom_idcs_tmp = get_frag_bonds(
                    Mol, lanc_mol_match, connector_mol_match, ring,
                    self.verbose)

                _check_ranc_atom_idcs = ranc_atom_idcs + ranc_atom_idcs_t
                _check_lanc_atom_idcs = lanc_atom_idcs + lanc_atom_idcs_t
                _check_rconn_atom_idcs = rconn_atom_idcs + rconn_atom_idcs_t
                _check_lconn_atom_idcs = lconn_atom_idcs + lconn_atom_idcs_t
                _check_rconn_idcs = rconn_idcs + rconn_idcs_t
                _check_lconn_idcs = lconn_idcs + lconn_idcs_t
                _check_ranc_mol = ranc_mol + ranc_mol_t
                _check_lanc_mol = lanc_mol + lanc_mol_t
                ### Sanity checking for ranc
                for _anc_atom, _anc_bond in zip(ratom_idcs_tmp,
                                                ranc_bond_idcs_tmp):
                    for list_idx in range(len(_check_ranc_mol)):
                        _ranc_mol_match = _check_ranc_mol[list_idx]
                        _lanc_mol_match = _check_lanc_mol[list_idx]
                        rconnector_idx = _check_rconn_idcs[list_idx]
                        lconnector_idx = _check_lconn_idcs[list_idx]

                        if _check_ranc_atom_idcs[list_idx] in ranc_mol_match \
                        and _check_rconn_atom_idcs[list_idx] in ranc_mol_match:
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to ranc overlap with ranc of connector",
                                    rconnector_idx,
                                )
                                print(
                                    self.connectors[rconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                        elif _check_lanc_atom_idcs[list_idx] in ranc_mol_match \
                        and _check_lconn_atom_idcs[list_idx] in ranc_mol_match:
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to ranc overlap with lanc of connector",
                                    lconnector_idx,
                                )
                                print(
                                    self.connectors[lconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                        elif (_check_ranc_atom_idcs[list_idx]==_anc_atom[0] \
                        and _check_rconn_atom_idcs[list_idx]==_anc_atom[1]) \
                        or (_check_ranc_atom_idcs[list_idx]==_anc_atom[1] \
                        and _check_rconn_atom_idcs[list_idx]==_anc_atom[0]):
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to ranc overlap with ranc of connector",
                                    rconnector_idx,
                                )
                                print(
                                    self.connectors[rconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                        elif (_check_lanc_atom_idcs[list_idx]==_anc_atom[0] \
                        and _check_lconn_atom_idcs[list_idx]==_anc_atom[1]) \
                        or (_check_lanc_atom_idcs[list_idx]==_anc_atom[1] \
                        and _check_lconn_atom_idcs[list_idx]==_anc_atom[0]):
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to ranc overlap with lanc of connector",
                                    lconnector_idx,
                                )
                                print(
                                    self.connectors[lconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                        elif _anc_atom[0] in _ranc_mol_match \
                        and _anc_atom[1] in _ranc_mol_match:
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to ranc overlap with ranc of connector",
                                    rconnector_idx,
                                )
                                print(
                                    self.connectors[rconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                        elif _anc_atom[0] in _lanc_mol_match \
                        and _anc_atom[1] in _lanc_mol_match:
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to ranc overlap with lanc of connector",
                                    lconnector_idx,
                                )
                                print(
                                    self.connectors[lconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                    if self.verbose and idx1 not in connector_mol_match_exclude:
                        print(
                            "Ranc decompostion of bond %d of anc atom %d - conn atom %d."
                            % (_anc_bond, _anc_atom[0], _anc_atom[1]))

                ### Sanity checking for lanc
                for _anc_atom, _anc_bond in zip(latom_idcs_tmp,
                                                lanc_bond_idcs_tmp):
                    for list_idx in range(len(lanc_mol)):
                        _ranc_mol_match = _check_ranc_mol[list_idx]
                        _lanc_mol_match = _check_lanc_mol[list_idx]
                        rconnector_idx = _check_rconn_idcs[list_idx]
                        lconnector_idx = _check_lconn_idcs[list_idx]

                        if _check_ranc_atom_idcs[list_idx] in ranc_mol_match \
                        and _check_rconn_atom_idcs[list_idx] in ranc_mol_match:
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to lanc overlap with ranc of connector",
                                    rconnector_idx,
                                )
                                print(
                                    self.connectors[rconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                        elif _check_lanc_atom_idcs[list_idx] in ranc_mol_match \
                        and _check_lconn_atom_idcs[list_idx] in ranc_mol_match:
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to lanc overlap with lanc of connector",
                                    lconnector_idx,
                                )
                                print(
                                    self.connectors[lconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                        elif (_check_ranc_atom_idcs[list_idx]==_anc_atom[0] \
                        and _check_rconn_atom_idcs[list_idx]==_anc_atom[1]) \
                        or (_check_ranc_atom_idcs[list_idx]==_anc_atom[1] \
                        and _check_rconn_atom_idcs[list_idx]==_anc_atom[0]):
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to lanc overlap with ranc of connector",
                                    rconnector_idx,
                                )
                                print(
                                    self.connectors[rconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                        elif (_check_lanc_atom_idcs[list_idx]==_anc_atom[0] \
                        and _check_lconn_atom_idcs[list_idx]==_anc_atom[1]) \
                        or (_check_lanc_atom_idcs[list_idx]==_anc_atom[1] \
                        and _check_lconn_atom_idcs[list_idx]==_anc_atom[0]):
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to lanc overlap with lanc of connector",
                                    lconnector_idx,
                                )
                                print(
                                    self.connectors[lconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                        elif _anc_atom[0] in _ranc_mol_match \
                        and _anc_atom[1] in _ranc_mol_match:
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to lanc overlap with ranc of connector",
                                    rconnector_idx,
                                )
                                print(
                                    self.connectors[rconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                        elif _anc_atom[0] in _lanc_mol_match \
                        and _anc_atom[1] in _lanc_mol_match:
                            if self.verbose:
                                print("Excluded current connector match", )
                                print(
                                    "due to lanc overlap with lanc of connector",
                                    lconnector_idx,
                                )
                                print(
                                    self.connectors[lconnector_idx].get_name())
                            if idx1 not in connector_mol_match_exclude:
                                connector_mol_match_exclude.append(idx1)

                    if self.verbose and idx1 not in connector_mol_match_exclude:
                        print(
                            "Lanc decompostion of bond %d of anc atom %d - conn atom %d."
                            % (_anc_bond, _anc_atom[0], _anc_atom[1]))

                if len(ranc_bond_idcs_tmp)==0 and \
                   len(lanc_bond_idcs_tmp)==0:
                    connector_mol_match_exclude.append(idx1)

                if idx1 in connector_mol_match_exclude:
                    if self.verbose:
                        print("Current connector_mol_match excluded.")
                    continue

                if self.verbose:
                    print("R anchor atom indices in molecule:", ranc_mol_match)
                    print("L anchor atom indices in molecule:", lanc_mol_match)

                ### Keep track of bonds that will be broken (r site)
                for bond_idx,\
                    atom_idx in zip(ranc_bond_idcs_tmp,
                                    ratom_idcs_tmp):

                    anc_idx, conn_idx = atom_idx

                    atm_anc = Mol.GetAtomWithIdx(anc_idx)
                    atm_conn = Mol.GetAtomWithIdx(conn_idx)

                    if conn_obj.get_terminal():
                        ranc_atom_idcs_t.append(anc_idx)
                        ranc_mol_t.append(ranc_mol_match)
                        rconn_atom_idcs_t.append(conn_idx)
                        rconn_idcs_t.append(connector_idx)

                    else:
                        ranc_bond_idcs.append(bond_idx)

                        ranc_atom_idcs.append(anc_idx)
                        ranc_mol.append(ranc_mol_match)
                        rconn_atom_idcs.append(conn_idx)
                        rconn_idcs.append(connector_idx)

                ### Keep track of bonds that will be broken (l site)
                for bond_idx,\
                    atom_idx in zip(lanc_bond_idcs_tmp,
                                    latom_idcs_tmp):

                    anc_idx, conn_idx = atom_idx

                    atm_anc = Mol.GetAtomWithIdx(anc_idx)
                    atm_conn = Mol.GetAtomWithIdx(conn_idx)

                    if conn_obj.get_terminal():
                        lanc_atom_idcs_t.append(anc_idx)
                        lanc_mol_t.append(lanc_mol_match)
                        lconn_atom_idcs_t.append(conn_idx)
                        lconn_idcs_t.append(connector_idx)

                    else:
                        lanc_bond_idcs.append(bond_idx)

                        lanc_atom_idcs.append(anc_idx)
                        lanc_mol.append(lanc_mol_match)
                        lconn_atom_idcs.append(conn_idx)
                        lconn_idcs.append(connector_idx)

            ### End loop over connector_mol_matches

        ### End loop over all connectors

        if len(ranc_bond_idcs) > 0 or len(lanc_bond_idcs) > 0:

            if self.verbose:
                print("Start decomposing molecule....")

            frags = Chem.FragmentOnBonds(
                Mol,
                list(set(ranc_bond_idcs + lanc_bond_idcs)),  #Must be uniq
                addDummies=False)

            frags_mol, mol_frags_atm_idxs = Chem.GetMolFrags(frags, asMols=True, sanitizeFrags=False),\
                                            Chem.GetMolFrags(frags, asMols=False, sanitizeFrags=False)

            if self.verbose:
                self.process_list = frags_mol

            ### Make fragment to fragment cross couplings
            frag_canonical_ranks = list()
            for frag_mol, mol_atm_idxs in zip(frags_mol, mol_frags_atm_idxs):
                frag_canonical_ranks.append(list())
                for atm_idx, mol_atm_idx in enumerate(mol_atm_idxs):
                    frag_canonical_ranks[-1].append(
                        self.canonical_rank[mol_atm_idx])

            N_frags = len(frag_canonical_ranks)
            for i1 in range(N_frags):
                canonical_ranks1 = frag_canonical_ranks[i1]
                for i2 in range(N_frags):
                    if (i2 - 1) < i1:
                        continue
                    canonical_ranks2 = frag_canonical_ranks[i2]
                    for atm_idx2, rank2 in enumerate(canonical_ranks2):
                        if rank2 in canonical_ranks1:
                            atm_idx1 = canonical_ranks1.index(rank2)
                            self.frag2frag_frgs.append([i1, i2])
                            self.frag2frag_atms.append([atm_idx1, atm_idx2])

            ###
            ### Loop over all fragments generated by FragmentOnBonds.
            ### For each fragment that is not a connector find the
            ### corresponding connector fragment.
            ###
            ### frag_mol    : rdkit Mol instance
            ### mol_atm_idxs: atom indices of the original molecule that
            ###               generated the Mol instance of the original molecule
            ###

            for frag_mol, mol_atm_idxs in zip(frags_mol, mol_frags_atm_idxs):

                atm_idx_list = list()
                parity_list = list()
                chirality_list = list()
                bondtype_list = list()

                combo = copy.copy(frag_mol)

                rcap_list_map = list()
                lcap_list_map = list()

                rcap_conn_idx = list()
                lcap_conn_idx = list()

                ranc_list = list()
                lanc_list = list()

                ###
                ### Loop over all atoms in the fragment
                ###
                ### atm_idx    : atom index in the fragment indexing scheme
                ### mol_atm_idx: atom index in the original molecule indexing scheme
                ###
                ### Find fragments and the corresponding connector entry
                ###
                for atm_idx, mol_atm_idx in enumerate(mol_atm_idxs):

                    ###
                    ### A connector atom is stored in connector_mol_idcs
                    ### but will not be found in the anchor lists ranc_atom_idcs
                    ### and lanc_atom_idcs.
                    ###

                    ### Work through the terminal groups.
                    ### They have not been modified, since no
                    ### bonds are broken here.
                    for list_idx, _ranc_idx in enumerate(ranc_atom_idcs_t):
                        if mol_atm_idx != _ranc_idx:
                            continue

                        connector_idx = rconn_idcs_t[list_idx]

                        ranc_list.append(list())
                        for _anc_idx in ranc_mol_t[list_idx]:
                            if _anc_idx in mol_atm_idxs:
                                ranc_list[-1].append(
                                    mol_atm_idxs.index(_anc_idx))

                        rcap_conn_idx.append(connector_idx)
                        rcap_list_map.append(list())

                    for list_idx, _lanc_idx in enumerate(lanc_atom_idcs_t):
                        if mol_atm_idx != _lanc_idx:
                            continue

                        connector_idx = lconn_idcs_t[list_idx]

                        lanc_list.append(list())
                        for _anc_idx in lanc_mol_t[list_idx]:
                            if _anc_idx in mol_atm_idxs:
                                lanc_list[-1].append(
                                    mol_atm_idxs.index(_anc_idx))

                        lcap_conn_idx.append(connector_idx)
                        lcap_list_map.append(list())

                    ### Now, work through the non-terminal groups.
                    ### All these groups have open bond endings, since we have
                    ### broken them during the fragmentation process. Connect
                    ### all these open bond endings to capping groups.
                    for list_idx, _ranc_idx in enumerate(ranc_atom_idcs):
                        if mol_atm_idx != _ranc_idx:
                            continue

                        ranc_list.append(list())
                        for _anc_idx in ranc_mol[list_idx]:
                            if _anc_idx in mol_atm_idxs:
                                ranc_list[-1].append(
                                    mol_atm_idxs.index(_anc_idx))

                        ### combo_length is also the offset of the idx for
                        ### the newly added cap.
                        connector_idx = rconn_idcs[list_idx]
                        conn = self.connectors[connector_idx]
                        cap = conn.get_rcap()
                        combo_length = combo.GetNumAtoms()
                        cap_length = cap.GetNumAtoms()

                        ### Connect fragment and connector cap
                        combo = Chem.CombineMols(combo, cap)
                        atm_idx_list.append([atm_idx, combo_length])
                        parity_list.append(
                            [get_parity(Mol, mol_atm_idx),
                             get_parity(cap, 0)])
                        chirality_list.append([
                            Mol.GetAtomWithIdx(atm_idx).GetChiralTag(),
                            cap.GetAtomWithIdx(0).GetChiralTag()
                        ])
                        bondtype_list.append(
                            Mol.GetBondWithIdx(
                                ranc_bond_idcs[list_idx]).GetBondType())

                        rcap_list_map.append(
                            range(combo_length, combo_length + cap_length))
                        rcap_conn_idx.append(connector_idx)

                    for list_idx, _lanc_idx in enumerate(lanc_atom_idcs):

                        if mol_atm_idx != _lanc_idx:
                            continue

                        lanc_list.append(list())
                        for _anc_idx in lanc_mol[list_idx]:
                            if _anc_idx in mol_atm_idxs:
                                lanc_list[-1].append(
                                    mol_atm_idxs.index(_anc_idx))

                        ### combo_length is also the offset of the idx for
                        ### the newly added cap.
                        connector_idx = lconn_idcs[list_idx]
                        conn = self.connectors[connector_idx]
                        cap = conn.get_lcap()
                        combo_length = combo.GetNumAtoms()
                        cap_length = cap.GetNumAtoms()

                        ### Connect fragment and connector cap
                        combo = Chem.CombineMols(combo, cap)
                        atm_idx_list.append([atm_idx, combo_length])
                        parity_list.append(
                            [get_parity(Mol, mol_atm_idx),
                             get_parity(cap, 0)])
                        chirality_list.append([
                            Mol.GetAtomWithIdx(atm_idx).GetChiralTag(),
                            cap.GetAtomWithIdx(0).GetChiralTag()
                        ])
                        bondtype_list.append(
                            Mol.GetBondWithIdx(
                                lanc_bond_idcs[list_idx]).GetBondType())

                        lcap_list_map.append(
                            range(combo_length, combo_length + cap_length))
                        lcap_conn_idx.append(connector_idx)

                ### Put molecule together
                e_combo = Chem.RWMol(combo)
                for atm_idx, bondtype in zip(atm_idx_list, bondtype_list):
                    e_combo.AddBond(atm_idx[0], atm_idx[1], bondtype)

                combo = e_combo.GetMol()

                for atm_idx, parity, chirality in zip(atm_idx_list,\
                                                      parity_list,\
                                                      chirality_list):

                    for i in range(2):
                        if get_parity(combo, atm_idx[i]) != parity[i]:
                            new_chiral = None
                            if chirality[i] == CHI_TETRAHEDRAL_CCW:
                                new_chiral = CHI_TETRAHEDRAL_CW
                            elif chirality[i] == CHI_TETRAHEDRAL_CW:
                                new_chiral = CHI_TETRAHEDRAL_CCW
                            if new_chiral != None:
                                atm = e_combo.GetAtomWithIdx(atm_idx[i])
                                atm.SetChiralTag(new_chiral)

                self.frag_list.append(combo)
                self.frag_list_map.append(mol_atm_idxs)

                self.rcap_list_map.append(rcap_list_map)
                self.lcap_list_map.append(lcap_list_map)

                self.rcap_conn_idx.append(rcap_conn_idx)
                self.lcap_conn_idx.append(lcap_conn_idx)

                self.ranc_list.append(ranc_list)
                self.lanc_list.append(lanc_list)

                self.__frag_count += 1

            ### end loop over all fragments

        elif self.verbose:
            print("Nothing to decompose.")

        return True
Example #29
0
    def __make_surrogate_cap(self):

        ranc_list = self.get_s_ranc()
        lanc_list = self.get_s_lanc()
        rcap_mol = self.get_rcap()
        lcap_mol = self.get_lcap()
        conn_mol = self.get_surrogate()

        ranc_conn = list()
        lanc_conn = list()

        ranc_bond = list()
        lanc_bond = list()

        ranc_bondtype = list()
        lanc_bondtype = list()

        ranc_parity = list()
        lanc_parity = list()

        ranc_chirality = list()
        lanc_chirality = list()

        for atm_idx in ranc_list:
            atm = conn_mol.GetAtomWithIdx(atm_idx)
            for neighbor in atm.GetNeighbors():
                neighbor_idx = neighbor.GetIdx()
                if neighbor_idx not in ranc_list:
                    bond = conn_mol.GetBondBetweenAtoms(atm_idx,\
                                                        neighbor_idx)

                    ranc_conn.append(neighbor_idx)
                    ranc_bondtype.append(bond.GetBondType())
                    ranc_parity.append(get_parity(conn_mol, neighbor_idx))
                    ranc_chirality.append(neighbor.GetChiralTag())
                    ranc_bond.append(bond.GetIdx())

        for atm_idx in lanc_list:
            atm = conn_mol.GetAtomWithIdx(atm_idx)
            for neighbor in atm.GetNeighbors():
                neighbor_idx = neighbor.GetIdx()
                if neighbor_idx not in lanc_list:
                    bond = conn_mol.GetBondBetweenAtoms(atm_idx,\
                                                        neighbor_idx)

                    lanc_conn.append(neighbor_idx)
                    lanc_bondtype.append(bond.GetBondType())
                    lanc_parity.append(get_parity(conn_mol, neighbor_idx))
                    lanc_chirality.append(neighbor.GetChiralTag())
                    lanc_bond.append(bond.GetIdx())

        frags = Chem.FragmentOnBonds(conn_mol,
                                     ranc_bond + lanc_bond,
                                     addDummies=False)

        frags_mol, mol_frags_atm_idxs = Chem.GetMolFrags(frags, asMols=True),\
                                        Chem.GetMolFrags(frags, asMols=False)

        rcap_length = rcap_mol.GetNumAtoms()
        lcap_length = lcap_mol.GetNumAtoms()

        ###
        ### Loop over all fragments generated by FragmentOnBonds
        ###
        ### frag_mol    : rdkit Mol instance
        ### mol_atm_idxs: atom indices of the original molecules that
        ###               generated the molecule
        ###

        for frag_mol, mol_atm_idxs in zip(frags_mol, mol_frags_atm_idxs):

            frag_length = frag_mol.GetNumAtoms()
            rcap_start = frag_length
            lcap_start = frag_length + rcap_length

            combo_mol = Chem.CombineMols(frag_mol, rcap_mol)
            combo_mol = Chem.CombineMols(combo_mol, lcap_mol)
            e_combo_mol = Chem.RWMol(combo_mol)

            is_frag = False

            ranc_check = list()
            lanc_check = list()

            rcap_list_map = range(frag_length, frag_length + rcap_length)
            lcap_list_map = range(frag_length + rcap_length,
                                  frag_length + rcap_length + lcap_length)

            ###
            ### Loop over all atoms in the fragment
            ###
            ### atm_idx    : atom index in the fragment indexing scheme
            ### mol_atm_idx: atom index in the original molecule indexing scheme
            ###

            for atm_idx, mol_atm_idx in enumerate(mol_atm_idxs):

                if mol_atm_idx in ranc_conn:
                    idx = ranc_conn.index(mol_atm_idx)
                    e_combo_mol.AddBond(atm_idx, rcap_start,
                                        ranc_bondtype[idx])

                    ranc_check.append([atm_idx, mol_atm_idx, ranc_parity[idx]])

                    is_frag = True
                if mol_atm_idx in lanc_conn:
                    idx = lanc_conn.index(mol_atm_idx)
                    e_combo_mol.AddBond(atm_idx, lcap_start,
                                        lanc_bondtype[idx])

                    lanc_check.append([atm_idx, mol_atm_idx, lanc_parity[idx]])
                    is_frag = True

            combo_mol = e_combo_mol.GetMol()

            if is_frag:

                for atm_idx, mol_atm_idx, parity in ranc_check:
                    if get_parity(combo_mol, atm_idx) != parity:
                        atm = combo_mol.GetAtomWithIdx(atm_idx)
                        new_chiral = None
                        if ranc_chirality == CHI_TETRAHEDRAL_CCW:
                            new_chiral = CHI_TETRAHEDRAL_CW
                        elif ranc_chirality == CHI_TETRAHEDRAL_CW:
                            new_chiral = CHI_TETRAHEDRAL_CCW
                        if new_chiral != None:
                            atm.SetChiralTag(new_chiral)

                for atm_idx, mol_atm_idx, parity in lanc_check:
                    if get_parity(combo_mol, atm_idx) != parity:
                        atm = combo_mol.GetAtomWithIdx(atm_idx)
                        new_chiral = None
                        if lanc_chirality == CHI_TETRAHEDRAL_CCW:
                            new_chiral = CHI_TETRAHEDRAL_CW
                        elif lanc_chirality == CHI_TETRAHEDRAL_CW:
                            new_chiral = CHI_TETRAHEDRAL_CCW
                        if new_chiral != None:
                            atm.SetChiralTag(new_chiral)

                if get_parity(combo_mol, rcap_start) != get_parity(
                        rcap_mol, 0):
                    atm = combo_mol.GetAtomWithIdx(rcap_start)
                    chirality = atm.GetChiralTag()
                    new_chiral = None
                    if chirality == CHI_TETRAHEDRAL_CCW:
                        new_chiral = CHI_TETRAHEDRAL_CW
                    elif chirality == CHI_TETRAHEDRAL_CW:
                        new_chiral = CHI_TETRAHEDRAL_CCW
                    if new_chiral != None:
                        atm.SetChiralTag(new_chiral)

                if get_parity(combo_mol, lcap_start) != get_parity(
                        lcap_mol, 0):
                    atm = combo_mol.GetAtomWithIdx(lcap_start)
                    chirality = atm.GetChiralTag()
                    new_chiral = None
                    if chirality == CHI_TETRAHEDRAL_CCW:
                        new_chiral = CHI_TETRAHEDRAL_CW
                    elif chirality == CHI_TETRAHEDRAL_CW:
                        new_chiral = CHI_TETRAHEDRAL_CCW
                    if new_chiral != None:
                        atm.SetChiralTag(new_chiral)

                self.__surr_cap = copy.copy(combo_mol)

                self.__rcap_list_map = rcap_list_map
                self.__lcap_list_map = lcap_list_map

                self.__surr2sur_cap_map = mol_atm_idxs

                return True

        return False
Example #30
0
    def _merge_part(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol,
                    anchor_index: int, attachment_details: List[Dict],
                    other_attachments: List[int],
                    other_attachment_details: List[List[Dict]]) -> Chem.Mol:
        """
        This does the messy work for merge_pair.

        :param scaffold:
        :param fragmentanda:
        :param anchor_index:
        :param attachment_details:
        :param other_attachments:
        :param other_attachment_details:
        :return:
        """
        # get bit to add.
        bonds_to_frag = []
        for detail in attachment_details:
            attachment_index = detail['idx_F']  # fragmentanda attachment_index
            bonds_to_frag += [
                fragmentanda.GetBondBetweenAtoms(anchor_index,
                                                 attachment_index).GetIdx()
            ]
        bonds_to_frag += [
            fragmentanda.GetBondBetweenAtoms(oi, oad[0]['idx_F']).GetIdx()
            for oi, oad in zip(other_attachments, other_attachment_details)
        ]
        if self._debug_draw and other_attachments:
            print('ring!', other_attachments)
            print('ring!', other_attachment_details)
        f = Chem.FragmentOnBonds(fragmentanda, bonds_to_frag, addDummies=False)
        frag_split = []
        fragmols = Chem.GetMolFrags(f,
                                    asMols=True,
                                    fragsMolAtomMapping=frag_split,
                                    sanitizeFrags=False)
        if self._debug_draw:
            print('Fragment splits')
            print(frag_split)
        # Get the fragment of interest.
        ii = 0
        for mol_N, indices in enumerate(frag_split):
            if anchor_index in indices:
                break
            ii += len(indices)
        else:
            raise Exception
        frag = fragmols[mol_N]
        frag_anchor_index = indices.index(anchor_index)
        # pre-emptively fix atom ori_i
        # offset collapsed to avoid clashes.
        self._offset_collapsed_ring(frag)
        self._offset_origins(frag)
        # Experimental code.
        # TODO: finish!
        # frag_atom = frag.GetAtomWithIdx(frag_anchor_index)
        # old2future = {atom.GetIntProp('_ori_i'): atom.GetIdx() + scaffold.GetNumAtoms() for atom in frag.GetAtoms()}
        # del old2future[-1] # does nothing but nice to double tap
        # if frag_atom.GetIntProp('_ori_i') == -1: #damn.
        #     for absent in self._get_mystery_ori_i(frag):
        #         old2future[absent] = scaffold_attachment_index
        # self._renumber_original_indices(frag, old2future)
        if self._debug_draw:
            print('Fragment to add')
            self.draw_nicely(frag)
        combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag))
        scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms()
        if self._debug_draw:
            print('Pre-merger')
            print(scaffold_anchor_index, attachment_details, anchor_index,
                  scaffold.GetNumAtoms())
            self.draw_nicely(combo)
        for detail in attachment_details:
            attachment_index = detail['idx_F']  # fragmentanda attachment_index
            scaffold_attachment_index = detail['idx_S']
            bond_type = detail['type']
            combo.AddBond(scaffold_anchor_index, scaffold_attachment_index,
                          bond_type)
        for oi, oad in zip(other_attachments, other_attachment_details):
            bond_type = oad[0]['type']
            scaffold_attachment_index = oad[0]['idx_S']
            scaffold_anchor_index = indices.index(oi) + scaffold.GetNumAtoms()
            combo.AddBond(scaffold_anchor_index, scaffold_attachment_index,
                          bond_type)
            if self._debug_draw:
                print(
                    f"Added additional {bond_type.name} bond between {scaffold_attachment_index} and {scaffold_anchor_index} " + \
                    f"(formerly {indices.index(oi)})")
        Chem.SanitizeMol(
            combo,
            sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS +
            Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY,
            catchErrors=True)
        if self._debug_draw:
            print('Merged')
            self.draw_nicely(combo)
        self._prevent_two_bonds_on_dummy(combo)
        scaffold = combo.GetMol()
        return scaffold