def get_smallest_root_match(self, mol: Mol) -> Mol:
        search_space: Set[Mol] = set(self.molecules.values())

        all_idxs = set(range(0, mol.GetNumAtoms()))
        included_idxs = {0}

        bonds = {}

        def register_bond(from_idx: int, to_idx: int):
            entry = bonds.get(from_idx)
            if entry is None:
                entry = []
                bonds[from_idx] = entry
            entry.append(to_idx)

        for bond in mol.GetBonds():
            begin = bond.GetBeginAtomIdx()
            end = bond.GetEndAtomIdx()
            register_bond(begin, end)
            register_bond(end, begin)

        while len(included_idxs) < mol.GetNumAtoms():
            frontier_permutations = reduce(
                lambda perms, from_idx: perms | set(
                    map(
                        lambda to_idx: frozenset([*included_idxs, to_idx]),
                        filter(lambda idx: idx not in included_idxs, bonds[from_idx])
                    )
                ),
                included_idxs,
                set()
            )

            new_search_space = set()
            for perm in frontier_permutations:
                e_mol = Chem.EditableMol(mol)
                perm_idxs = list(all_idxs - perm)
                perm_idxs.sort(reverse=True)
                for idx in perm_idxs:
                    e_mol.RemoveAtom(idx)
                display('mul')
                display_numbered(e_mol.GetMol())
                new_search_space |= set(self.find_superstructures(e_mol.GetMol(), search_space))
                included_idxs |= perm

            if len(new_search_space) == 0:
                return self.get_smallest_mol(list(search_space))
            search_space = new_search_space
            if len(search_space) < 100:
                display("from mul")
                for s in search_space:
                    display_numbered(s)

            if len(frontier_permutations) > 1:
                e_mol = Chem.EditableMol(mol)
                perm_idxs = list(all_idxs - included_idxs)
                perm_idxs.sort(reverse=True)
                for idx in perm_idxs:
                    e_mol.RemoveAtom(idx)
                display('single')
                display_numbered(e_mol.GetMol())
                new_search_space = set(self.find_superstructures(e_mol.GetMol(), search_space))

            if len(new_search_space) == 0:
                return self.get_smallest_mol(list(search_space))
            search_space = new_search_space
            if len(search_space) < 100:
                display("from mul")
                for s in search_space:
                    display_numbered(s)
Example #2
0
def new_process_covalent(directory):
    for f in [x[0] for x in os.walk(directory)]:
        covalent = False

        print(str(f) + '/*_bound.pdb')
        print(glob.glob(str(f) + '/*_bound.pdb'))
        if glob.glob(str(f) + '/*_bound.pdb'):

            bound_pdb = glob.glob(str(f) + '/*_bound.pdb')[0]
            mol_file = glob.glob(str(f) + '/*.mol')[0]
            pdb = open(bound_pdb, 'r').readlines()
            for line in pdb:
                if 'LINK' in line:
                    zero = line[13:27]
                    one = line[43:57]

                    if 'LIG' in zero:
                        res = one
                        covalent = True

                    if 'LIG' in one:
                        res = zero
                        covalent = True

            if covalent:
                for line in pdb:
                    if 'ATOM' in line and line[13:27]==res:
                        res_x = float(line[31:39])
                        res_y = float(line[39:47])
                        res_z = float(line[47:55])
                        res_atom_sym = line.rsplit()[-1].rstrip()
                        atom_sym_no = pd.DataFrame.from_csv('loader/atom_numbers.csv')
                        res_atom_no = atom_sym_no.loc[res_atom_sym].number
                        res_coords = [res_x, res_y, res_z]
                        print(res_coords)
                        atm = Chem.MolFromPDBBlock(line)
                        atm_trans = atm.GetAtomWithIdx(0)

                mol = Chem.MolFromMolFile(mol_file)
                # edmol = Chem.EditableMol(mol)

                orig_pdb_block = Chem.MolToPDBBlock(mol)

                lig_block = '\n'.join([l for l in orig_pdb_block.split('\n') if 'COMPND' not in l])
                lig_lines = [l for l in lig_block.split('\n') if 'HETATM' in l]
                j = 0
                old_dist = 100
                for line in lig_lines:
                    j += 1
                    #                 print(line)
                    if 'HETATM' in line:
                        coords = [line[31:39].strip(), line[39:47].strip(), line[47:55].strip()]
                        dist = get_3d_distance(coords, res_coords)

                        if dist < old_dist:
                            ind_to_add = j
                            print(dist)
                            old_dist = dist

                i = mol.GetNumAtoms()
                edmol = Chem.EditableMol(mol)
                edmol.AddAtom(atm_trans)
                edmol.AddBond(ind_to_add - 1, i, Chem.BondType.SINGLE)
                new_mol = edmol.GetMol()
                conf = new_mol.GetConformer()
                conf.SetAtomPosition(i, Point3D(res_coords[0], res_coords[1], res_coords[2]))
                try:
                    Chem.MolToMolFile(new_mol, mol_file)
                except ValueError:
                    Chem.MolToMolFile(new_mol, mol_file, kekulize=False)
    def from_mols(cls, name: str, followup: Chem.Mol, thio: Chem.Mol,
                  best_hit):
        self = cls.__new__(cls)
        self.smiles = Chem.MolToSmiles(followup, kekuleSmiles=False)
        self.name = name
        # cached property
        self._index_map = None
        self._name_map = None
        self._best_hit = best_hit
        self.notebook = {}
        # operations
        self.ori_mol = followup
        Chem.AddHs(self.ori_mol)
        self.thio_mol = thio
        Chem.AddHs(self.thio_mol)
        # Dethiolate manually. All this for a proton that will be out of place anyway.
        self.CX_idx, self.SX_idx = self.thio_mol.GetSubstructMatches(
            Chem.MolFromSmiles('C[S-]'))[-1]
        Chem.GetSSSR(thio)
        AllChem.EmbedMultipleConfs(thio, numConfs=100)
        AllChem.UFFOptimizeMoleculeConfs(thio, maxIters=2000)
        AllChem.ComputeGasteigerCharges(thio)
        AllChem.MMFFOptimizeMolecule(thio)
        dethio = Chem.EditableMol(thio)
        dethio.RemoveAtom(self.SX_idx)
        self.dethio_mol = dethio.GetMol()
        ## align ligand
        aligned_file = 'temp.mol'
        self.save_confs(self.dethio_mol, aligned_file)
        print(f'SMILES converted: {name}')
        self.parameterise(aligned_file)  # self.pdb_mol gets assigned.
        print(f'Parameterised: {name}')
        # pdbblock = self.make_placed_pdb()
        for ma, pa in zip(self.dethio_mol.GetAtoms(), self.pdb_mol.GetAtoms()):
            assert ma.GetSymbol() == pa.GetSymbol(
            ), f'The indices do not align! {ma.GetIdx()}:{ma.GetSymbol()} vs. {pa.GetIdx()}:{pa.GetSymbol()}'
            ma.SetMonomerInfo(pa.GetPDBResidueInfo())
        with GlobalPyMOL() as pymol:
            pymol.cmd.delete('*')
            # pymol.cmd.load(self.best_hit.relaxbound_file, 'apo')
            # # fix drift
            # pymol.cmd.load(self.best_hit.bound_file, 'ref')
            # pymol.cmd.align('apo', 'ref')
            # pymol.cmd.delete('ref')
            pymol.cmd.load(self.best_hit.apo_file, 'apo')
            pymol.cmd.remove('resn LIG')
            # distort positions
            pymol.cmd.read_pdbstr(
                Chem.MolToPDBBlock(self.fragmenstein.positioned_mol),
                'scaffold')
            pymol.cmd.save(
                f'{self.work_path}/{self.name}/{self.name}.scaffold.pdb')
            pymol.cmd.delete('scaffold')
            pymol.cmd.read_pdbstr(
                Chem.MolToPDBBlock(self.fragmenstein.positioned_mol), 'ligand')
            pdbblock = pymol.cmd.get_pdbstr('*')
            pymol.cmd.delete('*')
        return 'LINK         SG  CYS A 145                 CX  LIG B   1     1555   1555  1.8\n' + pdbblock

        open(f'{self.work_path}/{self.name}/pre_{self.name}.pdb',
             'w').write(pdbblock)
        self.make_overlap_image()
        self.pose = self.make_pose(pdbblock)
        print(f'PyRosetta loaded: {name}')
        self.egor = self.call_egor()
        print(f'EM: {name}')
        self.dock_pose()
        print(f'Docked: {name}')
        # if refine:
        #     self.refine_pose()
        self.snap_shot()
        print(f'Snapped: {name}')
        self.score = self.calculate_score()
        json.dump(self.notebook,
                  open(f'{self.work_path}/{self.name}/{self.name}.json', 'w'))
        print(f'Done: {name}')

        @classmethod
        def reanimate(cls,
                      mol: Chem.Mol,
                      hits: List[Hit],
                      constraint_file: str,
                      ligand_residue: Union[str, int, Tuple[int, str],
                                            pyrosetta.Vector1],
                      key_residues: Union[None, Sequence[Union[int, str,
                                                               Tuple[int,
                                                                     str]]],
                                          pyrosetta.Vector1] = None):
            fragmenstein = Fragmenstein(mol, hits)
            fragmenstein.positioned_mol
Example #4
0
    before = time.time()
    argc = len(sys.argv)
    if argc != 2:
        print("usage: %s input.smi" % sys.argv[0])
        sys.exit(1)
    input = sys.argv[1]
    count = 0
    wildcard = Chem.Atom(0)
    for name, orig_smile in RobustSmilesMolSupplier(input):
        mol = Chem.MolFromSmiles(orig_smile)
        # output original molecule first
        print("%s\t%s" % (orig_smile, name))
        num_atoms = mol.GetNumAtoms()
        # then output its variants
        for i in range(num_atoms):
            editable = Chem.EditableMol(mol)
            editable.ReplaceAtom(i, wildcard, preserveProps=True)
            edited = editable.GetMol()
            smi = Chem.MolToSmiles(edited)
            print("%s\t%s_%d" % (smi, name, i))
        count += 1
    after = time.time()
    dt = after - before
    print("%d molecules at %.2f mol/s" % (count, count / dt), file=sys.stderr)

# # original code by @Iwatobipen
# # replace any aromatic carbon to aromatic nitrogen.
# # TODO: does not compile
# def nitrogen_scan(mol_in):
#     out_mol_list = []
#     used = set()
Example #5
0
        def trim_mcs_chiral_atoms():
            """
                Remove all atoms in the MCS where there might be a chirality inversion i.e.
                (a) the corresponding atoms in the input molecules are both chiral, and
                (b) the parity of the atom mapping in the input molecules is reversed

                Calls map_mcs_mol as it uses the mappings generated there. 

            """
            def reorder_mol_to_mcs(mol):
                """ 
                    Reorder a molecule so that its atoms are in the same order as the MCS,
                    using the 'to_mcs' property. Return the reordered molecule (as a copy).
                """
                newindexes = list(range(mol.GetNumAtoms()))

                # Find the atom mapping to atom 0 in the MCS, and swap
                # it with position 0. Rinse and repeat
                for i in range(mol.GetNumAtoms()):
                    for j in range(i + 1, mol.GetNumAtoms()):
                        if (mol.GetAtomWithIdx(j).HasProp('to_mcs')):
                            if int(mol.GetAtomWithIdx(j).GetProp(
                                    'to_mcs')) == i:
                                newindexes[i], newindexes[j] = newindexes[
                                    j], newindexes[i]

                reordered_mol_copy = Chem.RenumberAtoms(mol, newindexes)
                return reordered_mol_copy

            def flag_inverted_atoms_in_mcs():
                """
                    Flag all atoms in the MCS where the chirality is inverted between
                    moli and molj with CHI_TETRAHEDRAL_CW)
                """
                # Generate atommappings as they are useful below
                map_mcs_mol()

                # moli chiral atoms
                rmoli = reorder_mol_to_mcs(self.moli)
                chiral_at_moli = [
                    seq[0] for seq in Chem.FindMolChiralCenters(rmoli)
                ]

                # molj chiral atoms
                rmolj = reorder_mol_to_mcs(self.molj)
                chiral_at_molj = [
                    seq[0] for seq in Chem.FindMolChiralCenters(rmolj)
                ]

                invertedatoms = []

                for i in chiral_at_moli:
                    # Is atom i in the MCS?
                    ai = rmoli.GetAtomWithIdx(i)
                    if (ai.HasProp('to_mcs')):
                        #print("Checking mol i chiral atom",i,ai.GetProp('to_mcs'))
                        for j in chiral_at_molj:
                            # Is atom j in the MCS?
                            aj = rmolj.GetAtomWithIdx(j)
                            if (aj.HasProp('to_mcs')):
                                #print("Matching mol j chiral atom",j,aj.GetProp('to_mcs'))
                                # Are they the same atom?
                                if (ai.GetProp('to_mcs') == aj.GetProp(
                                        'to_mcs')):
                                    #print("Matched mcs atom ",aj.GetProp('to_mcs'),"inverted?",ai.GetChiralTag()!=aj.GetChiralTag())

                                    # OK, atoms are both chiral, and match the same MCS atom
                                    # Check if the parities are the same. If not, flag with the
                                    # CHI_TETRAHEDRAL_CW property
                                    if (ai.GetChiralTag() !=
                                            aj.GetChiralTag()):
                                        invertedatoms.append(
                                            int(aj.GetProp('to_mcs')))

                for i in invertedatoms:
                    mcsat = self.mcs_mol.GetAtomWithIdx(i)
                    mcsat.SetChiralTag(
                        Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW)
                    if options.verbose == 'pedantic':
                        logging.info('Chiral atom detected: %d' % (i))

            #print("MCS before chiral trimming: ",Chem.MolToSmiles(self.mcs_mol))

            # Flag inverted atoms
            flag_inverted_atoms_in_mcs()

            # Trim inverted chiral Atoms. The algorithm is to delete the chiral centre,
            # fragment the molecule, and keep only the two largest fragments. Rinse and
            # repeat until no more flagged chiral centres remain
            #
            # Keep

            while True:
                mcs_chiral_set = set()
                atom_idx = -1

                for atom in self.mcs_mol.GetAtoms():
                    # Note that any atom in the MCS which is chiral in either input mol is
                    # flagged with CHI_TETRAHEDRAL_CW
                    if (atom.GetChiralTag() ==
                            Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW):
                        atom_idx = atom.GetIdx()
                        atom.SetChiralTag(
                            Chem.rdchem.ChiralType.CHI_UNSPECIFIED)
                        break

                if atom_idx == -1:  # Not found any more chiral atoms, so done
                    break

                # Move the chiral atom to the end (avoids indexing problems)
                newindexes = list(range(self.mcs_mol.GetNumAtoms()))
                newindexes.remove(atom_idx)
                newindexes.append(atom_idx)
                self.mcs_mol = Chem.RenumberAtoms(self.mcs_mol, newindexes)

                # Now we loop, deleting groups attached to the chiral atom, until the
                # chiral atom has at most two heavy atom connections
                # Note that getAtoms()[-1] returns the first atom not the last if you
                # don't convert it to a list. Grr.
                while list(self.mcs_mol.GetAtoms())[-1].GetDegree() > 2:

                    #print("MCS mol is",Chem.MolToSmiles(self.mcs_mol),self.mcs_mol.GetNumHeavyAtoms())

                    # Delete the chiral atom in a temporary molecule, and fragment. Since the
                    # chiral atom was the last one, the indexes in the temporary molecule are the
                    # same as in self.mcs_mol
                    edit_mol = Chem.EditableMol(self.mcs_mol)
                    edit_mol.RemoveAtom(self.mcs_mol.GetNumAtoms() - 1)
                    tmp_mol = edit_mol.GetMol()
                    fragments = Chem.rdmolops.GetMolFrags(tmp_mol)
                    #print("Fragments are" ,fragments)

                    # Get index of smallest fragments
                    min_idx = 0
                    lgt_min = 10000

                    for idx in range(0, len(fragments)):
                        lgt = len(fragments[idx])
                        if lgt < lgt_min:
                            lgt_min = lgt
                            min_idx = idx

                    # Get the atoms in this fragment and sort them so we delete the
                    # largest index first
                    min_frag = list(fragments[min_idx])
                    min_frag.sort(reverse=True)

                    edit_mol = Chem.EditableMol(self.mcs_mol)
                    for idx in min_frag:
                        edit_mol.RemoveAtom(idx)
                    self.mcs_mol = edit_mol.GetMol()
def ring_close_retro(lp, pm=None):
    '''
    Reverse of Ring-closing which will occur after addition of heat
    Source: Reynolds Class Notes, 8-25-20, p.2
    Test SMILES: '*c5ccc(Oc4ccc(n3c(=O)c2cc1c(=O)n(*)c(=O)c1cc2c3=O)cc4)cc5'
    '''
    if type(lp) == str or type(
            lp
    ) == Chem.rdchem.Mol:  #only for convenience. Pass in LinearPol object when possible
        lp = ru.LinearPol(lp)

    start_match = Chem.MolFromSmarts('[c,C;!R0;!R1](=O)[n,N;!R0;!R1]')
    end_match = Chem.MolFromSmarts('[#6R0](=O)([OH])[C,c][C,c][CR1](=O)[NR1]')
    if pm is None:
        pm = lp.PeriodicMol()
        if pm is not None:
            pm.GetSSSR()

    if pm is None:
        return []

    mols = []
    if pm.HasSubstructMatch(
            start_match) and not pm.HasSubstructMatch(end_match):
        lp_no_connect_inds = np.array(
            [x for x in range(lp.mol.GetNumAtoms()) if x not in lp.star_inds])

        def lp_to_pm_ind(lp_ind):
            return int(np.argwhere(lp_no_connect_inds == lp_ind))

        ar_atom_idx = [a.GetIdx() for a in lp.mol.GetAromaticAtoms()]
        if len(
                ar_atom_idx
        ) != 0:  #only execute below for aromatic polymers. There for speed.
            ri = lp.mol.GetRingInfo()
            ar = ri.AtomRings()
            atom_aromaticity = {a: 0 for a in ar_atom_idx}

            for ring in ar:
                ar_ring = 1
                for a in ring:
                    if a not in atom_aromaticity.keys():
                        ar_ring = 0
                if ar_ring == 1:
                    for a in ring:
                        atom_aromaticity[a] += 1

        all_matches = pm.mol.GetSubstructMatches(start_match)
        seen = set()
        matches = []  #unique matches
        for match in all_matches:
            ms = set(match)  #match set
            #print(seen.difference(ms))
            #print(ms.difference(seen))
            if len(ms.intersection(seen)) == 0:
                seen = seen.union(ms)
                matches.append(match)
        #matches = pm.mol.GetSubstructMatches(start_match)
        for L in range(1, len(matches) + 1):
            #for L in range(2,3):
            for match_combo in itertools.combinations(matches, L):
                em = Chem.EditableMol(pm.mol)
                #print('Match combo:', match_combo)
                for i_c, i_o, i_n in match_combo:  #indices of atoms in pm

                    #print('Matches: %s %s %s' %(i_c,i_o,i_n) )
                    fix_aromaticity = False
                    if pm.mol.GetBondBetweenAtoms(
                            i_c, i_n).GetBondType() == Chem.BondType.AROMATIC:
                        fix_aromaticity = True
                        ring_atoms = None
                        ring_size = 100
                        for i in range(len(ar)):
                            ring = ar[i]
                            if lp_no_connect_inds[
                                    i_c] in ring and lp_no_connect_inds[
                                        i_n] in ring and len(
                                            ring
                                        ) < ring_size:  #assume correct ring is the smallest one
                                ring_atoms = set(ring)
                                ring_size = len(ring)

                    o = em.AddAtom(Chem.AtomFromSmiles('O'))
                    em.AddBond(i_c, o, Chem.BondType.SINGLE)
                    #print('bond between %s and %s' %(i_c,o))
                    em.RemoveBond(i_c, i_n)
                    #print('Bond removed between %s and %s' %(i_c,i_n))

                    med_mol = em.GetMol()
                    if fix_aromaticity:
                        try:
                            i_n_aromaticity = atom_aromaticity[lp_to_pm_ind(
                                i_n)]
                        except:
                            i_n_aromaticity = 0
                        for i in ring_atoms:
                            if atom_aromaticity[
                                    i] == i_n_aromaticity:  #if an atom was part of same number of aromatic rings as the N atom, it shouldn't be aromatic
                                #print('Ring atom lp:',i)
                                pm_i = lp_to_pm_ind(i)
                                #print('Ring atom pm:',pm_i)
                                med_mol.GetAtomWithIdx(pm_i).SetIsAromatic(
                                    False)
                                #remove all aromatic bonds
                                neighs = [
                                    x.GetIdx() for x in med_mol.GetAtoms()
                                    [pm_i].GetNeighbors()
                                ]
                                aromatic_neighs = [
                                    x for x in neighs
                                    if med_mol.GetBondBetweenAtoms(pm_i, x).
                                    GetBondType() == Chem.BondType.AROMATIC
                                ]
                                #print('Aromatic neighs of %s: %s' %(pm_i,aromatic_neighs))
                                em = Chem.EditableMol(med_mol)
                                for x in aromatic_neighs:
                                    em.RemoveBond(x, pm_i)
                                    em.AddBond(x, pm_i, Chem.BondType.SINGLE)
                                med_mol = em.GetMol()

                em = Chem.EditableMol(med_mol)
                star1 = em.AddAtom(Chem.AtomFromSmiles('*'))
                star2 = em.AddAtom(Chem.AtomFromSmiles('*'))
                em.RemoveBond(pm.connector_inds[0], pm.connector_inds[1])
                em.AddBond(pm.connector_inds[0], star1, Chem.BondType.SINGLE)
                em.AddBond(pm.connector_inds[1], star2, Chem.BondType.SINGLE)

                new_mol = em.GetMol()
                try:
                    Chem.SanitizeMol(new_mol)
                    mols.append(ru.mol_without_atom_index(new_mol))
                except:
                    return []
        return mols
    else:
        return []
Example #7
0
def delete_bonds(smi, id, mol, bonds, out):

    #use the same parent mol object and create editable mol
    em = Chem.EditableMol(mol)

    #loop through the bonds to delete
    isotope = 0
    isotope_track = {}
    for i in bonds:
        isotope += 1
        #remove the bond
        em.RemoveBond(i[0], i[1])

        #now add attachement points
        newAtomA = em.AddAtom(Chem.Atom(0))
        em.AddBond(i[0], newAtomA, Chem.BondType.SINGLE)

        newAtomB = em.AddAtom(Chem.Atom(0))
        em.AddBond(i[1], newAtomB, Chem.BondType.SINGLE)

        #keep track of where to put isotopes
        isotope_track[newAtomA] = isotope
        isotope_track[newAtomB] = isotope

    #should be able to get away without sanitising mol
    #as the existing valencies/atoms not changed
    modifiedMol = em.GetMol()

    #canonical smiles can be different with and without the isotopes
    #hence to keep track of duplicates use fragmented_smi_noIsotopes
    fragmented_smi_noIsotopes = Chem.MolToSmiles(modifiedMol,
                                                 isomericSmiles=True)

    valid = True
    fragments = fragmented_smi_noIsotopes.split(".")

    #check if its a valid triple cut
    if (isotope == 3):
        valid = False
        for f in fragments:
            matchObj = re.search('\*.*\*.*\*', f)
            if matchObj:
                valid = True
                break

    if valid:
        if (isotope == 1):
            fragmented_smi_noIsotopes = re.sub('\[\*\]', '[*:1]',
                                               fragmented_smi_noIsotopes)

            fragments = fragmented_smi_noIsotopes.split(".")

            #print fragmented_smi_noIsotopes
            s1 = Chem.MolFromSmiles(fragments[0])
            s2 = Chem.MolFromSmiles(fragments[1])

            #need to cansmi again as smiles can be different
            output = '%s,%s,,%s.%s' % (
                smi, id, Chem.MolToSmiles(s1, isomericSmiles=True),
                Chem.MolToSmiles(s2, isomericSmiles=True))
            if ((output in out) == False):
                out.add(output)

        elif (isotope >= 2):
            #add the isotope labels
            for key in isotope_track:
                #to add isotope lables
                modifiedMol.GetAtomWithIdx(key).SetIsotope(isotope_track[key])
            fragmented_smi = Chem.MolToSmiles(modifiedMol, isomericSmiles=True)

            #change the isotopes into labels - currently can't add SMARTS or labels to mol
            fragmented_smi = re.sub('\[1\*\]', '[*:1]', fragmented_smi)
            fragmented_smi = re.sub('\[2\*\]', '[*:2]', fragmented_smi)
            fragmented_smi = re.sub('\[3\*\]', '[*:3]', fragmented_smi)

            fragments = fragmented_smi.split(".")

            #identify core/side chains and cansmi them
            core, side_chains = find_correct(fragments)

            #now change the labels on sidechains and core
            #to get the new labels, cansmi the dot-disconnected side chains
            #the first fragment in the side chains has attachment label 1, 2nd: 2, 3rd: 3
            #then change the labels accordingly in the core

            #this is required by the indexing script, as the side-chains are "keys" in the index
            #this ensures the side-chains always have the same numbering

            isotope_track = {}
            side_chain_fragments = side_chains.split(".")

            for s in xrange(len(side_chain_fragments)):
                matchObj = re.search('\[\*\:([123])\]',
                                     side_chain_fragments[s])
                if matchObj:
                    #add to isotope_track with key: old_isotope, value:
                    isotope_track[matchObj.group(1)] = str(s + 1)

            #change the labels if required
            if (isotope_track['1'] != '1'):
                core = re.sub('\[\*\:1\]',
                              '[*:XX' + isotope_track['1'] + 'XX]', core)
                side_chains = re.sub('\[\*\:1\]',
                                     '[*:XX' + isotope_track['1'] + 'XX]',
                                     side_chains)
            if (isotope_track['2'] != '2'):
                core = re.sub('\[\*\:2\]',
                              '[*:XX' + isotope_track['2'] + 'XX]', core)
                side_chains = re.sub('\[\*\:2\]',
                                     '[*:XX' + isotope_track['2'] + 'XX]',
                                     side_chains)

            if (isotope == 3):
                if (isotope_track['3'] != '3'):
                    core = re.sub('\[\*\:3\]',
                                  '[*:XX' + isotope_track['3'] + 'XX]', core)
                    side_chains = re.sub('\[\*\:3\]',
                                         '[*:XX' + isotope_track['3'] + 'XX]',
                                         side_chains)

            #now remove the XX
            core = re.sub('XX', '', core)
            side_chains = re.sub('XX', '', side_chains)

            output = '%s,%s,%s,%s' % (smi, id, core, side_chains)
            if ((output in out) == False):
                out.add(output)
def join_frag_linker(linker, st_pt, random_join=True):

    if linker == "":
        du = Chem.MolFromSmiles('*')
        #print(Chem.MolToSmiles(Chem.RemoveHs(AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt),du,Chem.MolFromSmiles('[H]'),True)[0])).split('.')[0])
        return Chem.MolToSmiles(
            Chem.RemoveHs(
                AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt), du,
                                          Chem.MolFromSmiles('[H]'),
                                          True)[0])).split('.')[0]

    combo = Chem.CombineMols(Chem.MolFromSmiles(linker),
                             Chem.MolFromSmiles(st_pt))

    # Include dummy in query
    du = Chem.MolFromSmiles('*')
    qp = Chem.AdjustQueryParameters()
    qp.makeDummiesQueries = True

    qlink = Chem.AdjustQueryProperties(Chem.MolFromSmiles(linker), qp)
    linker_atoms = combo.GetSubstructMatches(qlink)
    if len(linker_atoms) > 1:
        for l_atoms in linker_atoms:
            count_dummy = 0
            for a in l_atoms:
                if combo.GetAtomWithIdx(a).GetAtomicNum() == 0:
                    count_dummy += 1
            if count_dummy == 2:
                break
        linker_atoms = l_atoms
    else:
        linker_atoms = linker_atoms[0]
    linker_dummy_bonds = []
    linker_dummy_bonds_at = []
    linker_exit_points = []
    for atom in linker_atoms:
        if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0:
            linker_dummy_bonds.append(
                combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx())
            linker_dummy_bonds_at.append(
                (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()))
            linker_exit_points.append(
                combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())

    qst_pt = Chem.AdjustQueryProperties(Chem.MolFromSmiles(st_pt), qp)
    st_pt_atoms = combo.GetSubstructMatches(qst_pt)
    st_pt_atoms = list(
        set(range(combo.GetNumAtoms())).difference(linker_atoms))

    st_pt_dummy_bonds = []
    st_pt_dummy_bonds_at = []
    st_pt_exit_points = []
    for atom in st_pt_atoms:
        if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0:
            st_pt_dummy_bonds.append(
                combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx())
            st_pt_dummy_bonds_at.append(
                (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()))
            st_pt_exit_points.append(
                combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())

    combo_rw = Chem.EditableMol(combo)

    if random_join:
        np.random.shuffle(st_pt_exit_points)
        for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_points):
            if atom_1 == atom_2:
                print(linker, st_pt)
                break
            combo_rw.AddBond(atom_1, atom_2, order=Chem.rdchem.BondType.SINGLE)

        bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at
        for bond in sorted(bonds_to_break, reverse=True):
            combo_rw.RemoveBond(bond[0], bond[1])

        final_mol = combo_rw.GetMol()
        final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'),
                           key=lambda x: len(x),
                           reverse=True)[0]
        return final_mol

    else:
        final_mols = []
        for st_pt_exit_pts in [st_pt_exit_points, st_pt_exit_points[::-1]]:
            combo_rw = Chem.EditableMol(combo)
            for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_pts):
                if atom_1 == atom_2:
                    print(linker, st_pt)
                    break
                combo_rw.AddBond(atom_1,
                                 atom_2,
                                 order=Chem.rdchem.BondType.SINGLE)

            bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at
            for bond in sorted(bonds_to_break, reverse=True):
                combo_rw.RemoveBond(bond[0], bond[1])

            final_mol = combo_rw.GetMol()
            final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'),
                               key=lambda x: len(x),
                               reverse=True)[0]
            final_mols.append(final_mol)
        return final_mols
Example #9
0
def fragment_rdkit_mol(m, bond):
    """
    Break a bond in rdkit molecule and obtain the fragment(s).

    Args:
        m (Chem.Mol): rdkit molecule to fragment
        bond (tuple): bond index (2-tuple)

    Returns:
        frags (list): fragments (rdkit molecules) by breaking the bond.
            Could be of size 1 or 2, depending on the number of fragments.
    """
    def create_rdkit_mol_from_fragment(m, name, bond_atom):
        """
        Convert a rdkit mol fragment to a rdkit mol with cleans.

        The formal charge is set to `None` for the atom in the broken bond (it will
        then be assigned when sanitize the mol) and untouched for other atoms
        (having the value as in the parent molecule).
        """
        species = [a.GetSymbol() for a in m.GetAtoms()]

        # coords = m.GetConformer().GetPositions()
        # NOTE, the above way to get coords results in segfault on linux, so we use the
        # below workaround
        conformer = m.GetConformer()
        coords = [[x for x in conformer.GetAtomPosition(i)]
                  for i in range(m.GetNumAtoms())]

        # should not sort (b.GetBeginAtomIdx(), b.GetEndAtomIdx()), because dative bond
        # needs to have the metal as the end atom
        bond_types = {(b.GetBeginAtomIdx(), b.GetEndAtomIdx()):
                      b.GetBondType()
                      for b in m.GetBonds()}

        # adjust format charge
        formal_charge = [a.GetFormalCharge() for a in m.GetAtoms()]
        formal_charge[bond_atom] = None

        new_m = create_rdkit_mol(species,
                                 coords,
                                 bond_types,
                                 formal_charge,
                                 name,
                                 force_sanitize=True)

        return new_m

    edm = Chem.EditableMol(m)
    edm.RemoveBond(*bond)
    m1 = edm.GetMol()
    atom_mapping = []
    frags = Chem.GetMolFrags(m1,
                             asMols=True,
                             sanitizeFrags=True,
                             fragsMolAtomMapping=atom_mapping)

    # Although we passed sanitizeFrags=True to Chem.GetMolFrags. Some properties are
    # still incorrect:
    # 1. features of the returned frags. e.g. TotalDegree() of an atom of the broken
    #    bond still has the value before bond breaking.
    # 2. rdkit converts N(=O)=O to [N+](=O)O-] when sanitizing the mol, see
    #    http://www.rdkit.org/docs/RDKit_Book.html#molecular-sanitization
    #    So we need to copy the formal charge from the parent mol to the fragments,
    #    otherwise, sanitizing will result in error for the fragments.
    # To address these, we create frags from scratch using info from the frags.

    new_frags = []
    for i, fg in enumerate(frags):
        for frag_atom_idx, parent_atom_idx in enumerate(atom_mapping[i]):
            if parent_atom_idx in bond:
                bond_atom = frag_atom_idx
                break
        else:
            # this should never happen
            raise RuntimeError("Cannot find bond atom in fragments")

        name = f"{m.GetProp('_Name')}_frag{i}"
        fg = create_rdkit_mol_from_fragment(fg, name, bond_atom)
        new_frags.append(fg)

    return new_frags
Example #10
0
# -*- coding: utf-8 -*-
"""
Created on Fri Apr 24 17:45:39 2020

@author: dmattox
"""

from rdkit import Chem
from rdkit.Chem import Descriptors

lst = ['OC[C@H]1OC[C@@H]([C@H]([C@@H]1O)O)O', 'OC[C@H]1OC(O[C@@H]2[C@@H](CO)O[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@@H](O)[C@@H]1O']

mols = [Chem.MolFromSmiles(smile, sanitize = False) for smile in lst]

mergeMol = Chem.CombineMols(mols[0], mols[1])
edMerge = Chem.EditableMol(mergeMol)
edMerge.AddBond(20, 4, order = Chem.rdchem.BondType.SINGLE)
mergeMol = edMerge.GetMol()

print(Chem.MolToSmiles(mergeMol))
print(Chem.inchi.MolToInchiKey(mergeMol))
print(Descriptors.MolLogP(mergeMol)) 


{'ACE:A:0': <pliptool.plip.modules.plipxml.BSite at 0x128700710>,
 'ACE:B:0': <pliptool.plip.modules.plipxml.BSite at 0x1281c0a10>,
 'BGC:A:122': <pliptool.plip.modules.plipxml.BSite at 0x1287cadd0>,
 'BGC:A:124': <pliptool.plip.modules.plipxml.BSite at 0x1287caed0>,
 'BGC:A:126': <pliptool.plip.modules.plipxml.BSite at 0x1287a8350>,
 'BGC:B:122': <pliptool.plip.modules.plipxml.BSite at 0x1287a8e10>,
 'BGC:B:124': <pliptool.plip.modules.plipxml.BSite at 0x1287a8410>,
Example #11
0
    def generate_fragemnts(self):

        # Clear class level list
        del self.molecule[:]

        # Get text input from LineEdit widget
        self.smile = str(self.NTPS_smiles_entry.text())

        # Append cStructure mol to class level list
        self.molecule.append(
            cStructure('precursor', self.smile, 'precursor.png'))

        # SMARTS string for bond disconnection
        self.patt = Chem.MolFromSmarts(
            '[!$([NH]!@C(=O))&!D1&!$(*#*)]-&!@[!$([NH]!@C(=O))&!D1&!$(*#*)]')

        # Init parent mol object
        self.mol = Chem.MolFromSmiles(self.smile)

        # find the rotatable bonds
        self.bonds = self.mol.GetSubstructMatches(self.patt)

        # create an editable molecule, break the bonds, and add dummies:
        self.all_smis = [self.smile]

        # disconnect rotatable bonds
        for a, b in self.bonds:
            self.em = Chem.EditableMol(self.mol)
            self.nAts = self.mol.GetNumAtoms()
            self.em.RemoveBond(a, b)
            self.em.AddAtom(Chem.Atom(0))
            self.em.AddBond(a, self.nAts, Chem.BondType.SINGLE)
            self.em.AddAtom(Chem.Atom(0))
            self.em.AddBond(b, self.nAts + 1, Chem.BondType.SINGLE)
            self.nAts += 2
            self.p = self.em.GetMol()
            Chem.SanitizeMol(self.p)
            self.smis = [
                Chem.MolToSmiles(x, True)
                for x in Chem.GetMolFrags(self.p, asMols=True)
            ]
            for self.smi in self.smis:
                self.all_smis.append(self.smi)

        # draw molecules and save png images for display in structure viewer widget
        # --> there's probably a better way to do this...
        self.draw = 0
        for i, self.smi in enumerate(self.all_smis):
            if i == 0:
                struct_type = 'precursor'
                img_path = 'precursor.png'

            else:
                struct_type, img_path = 'fragment', 'fragment_%s.png' % self.draw

            self.molecule.append(cStructure(struct_type, self.smi, img_path))
            self.template = Chem.MolFromSmiles(self.smi)
            drawOptions = DrawingOptions()
            drawOptions.bgColor = (0, 0, 0)
            Draw.MolToFile(self.template, img_path, options=drawOptions)

            # append to class level list
            self.structures.append(img_path)

            # append to GUI fragments listbox - file name only
            self.NTPS_frag_list.addItem(
                QtGui.QListWidgetItem(
                    img_path.split('/')[-1].strip().split('.')[0]))

            self.draw += 1

        self.autocomplete_mod_properties()

        # add image of first structure to graphicsview
        scene = QtGui.QGraphicsScene()
        scene.addPixmap(QtGui.QPixmap(self.structures[0]))
        self.NTPS_structure_view.setScene(scene)
        self.NTPS_structure_view.show()

        return
Example #12
0
def add_bond_between(mol, a1, a2, bond_type):
    """Add a new bond between atom"""
    emol = Chem.EditableMol(mol)
    emol.AddBond(a1.GetIdx(), a2.GetIdx(), bond_type)
    return dm.sanitize_mol(emol.GetMol())
#!/usr/bin/env python
from rdkit import Chem
from rdkit.Chem import AllChem
import os
 
template = Chem.MolFromSmiles('CCC1=C(C2=NC1=CC3=C(C4=C([N-]3)C(=C5[C@H]([C@@H](C(=N5)C=C6C(=C(C(=C2)[N-]6)C=C)C)C)CCC(=O)OC/C=C(\C)/CCCC(C)CCCC(C)CCCC(C)C)[C@H](C4=O)C(=O)OC)C)C.[Mg+2]')
 
for filename in os.listdir("."):
    if filename[-4:] == ".pdb":
        mol = Chem.MolFromPDBFile(filename,sanitize=False)
        bs=[]
        for atom in mol.GetAtoms():
            if atom.GetSymbol() == "Mg":
                atom.SetNoImplicit(True)
                for bond in atom.GetBonds():
                    bs.append([bond.GetBeginAtomIdx(),bond.GetEndAtomIdx()])
        emol = Chem.EditableMol(mol)
        for b in bs:
            emol.RemoveBond(b[0],b[1])
        newmol = emol.GetMol()
        print Chem.MolToSmiles(newmol)
        newmol=Chem.AllChem.AssignBondOrdersFromTemplate(template,newmol)
        molHs=Chem.AddHs(newmol,addCoords=True)
        open(filename[:-4]+'-addedHs.mol','w').write(Chem.MolToMolBlock(molHs))
Example #14
0
def adj_to_nlist(atoms, A, nlist_model, embeddings):
    bonds = {
        1: Chem.rdchem.BondType.SINGLE,
        2: Chem.rdchem.BondType.DOUBLE,
        3: Chem.rdchem.BondType.TRIPLE
    }
    m = Chem.EditableMol(Chem.Mol())
    for a in atoms:
        m.AddAtom(Chem.Atom(a))
    for i in range(len(atoms)):
        for j in range(i, len(atoms)):
            if A[i, j] > 0:
                m.AddBond(i, j, bonds[A[i, j]])
    mol = m.GetMol()
    try:
        AllChem.EmbedMolecule(mol)
        # Not necessary according to current docs
        '''
        mol.UpdatePropertyCache(strict=False)
        for i in range(1000):
            r = AllChem.MMFFOptimizeMolecule(mol, maxIters=100)
            if r == 0:
                break
            if r == -1:
                raise ValueError()
        '''
    except (ValueError, RuntimeError) as e:
        print('Unable to process')
        print(Chem.MolToSmiles(mol))
        raise e
    for c in mol.GetConformers():
        pos = c.GetPositions()
        N = len(pos)
        np_pos = np.zeros((N, 3))
        np_pos[:N, :] = pos
        pos_nlist = nlist_model(np_pos)
        nlist = np.zeros((MAX_ATOM_NUMBER, NEIGHBOR_NUMBER, 3))

        # compute bond distances
        bonds = np.zeros((MAX_ATOM_NUMBER, MAX_ATOM_NUMBER), dtype=np.int64)
        # need to rebuild adjacency matrix with new atom ordering
        for b in mol.GetBonds():
            bonds[b.GetBeginAtomIdx(), b.GetEndAtomIdx()] = 1
            bonds[b.GetEndAtomIdx(), b.GetBeginAtomIdx()] = 1

        # a 0 -> non-bonded
        for index in range(N):
            for ni in range(len(pos_nlist[index])):
                if pos_nlist[
                        index, ni,
                        0] >= 100:  # this is a large distance sentinel indicating not part of nlist
                    continue
                j = int(pos_nlist[index, ni, 1])
                # / 10 to get to nm
                nlist[index, ni, 0] = pos_nlist[index, ni, 0] / 10
                nlist[index, ni, 1] = j
                # a 0 -> non-bonded
                if bonds[index, ni] == 0:
                    nlist[index, ni, 2] = embeddings['nlist']['nonbonded']
                else:
                    # currently only single is used!
                    nlist[index, ni, 2] = embeddings['nlist'][1]
        # pad out the nlist
        for index in range(N, MAX_ATOM_NUMBER):
            for ni in range(NEIGHBOR_NUMBER):
                nlist[index, ni, 0] = 0
                nlist[index, ni, 1] = 0
                nlist[index, ni, 2] = embeddings['nlist']['none']
        if False:
            # debugging
            print(nlist[:len(atoms)])
            a1, a2 = np.nonzero(A)
            for a1i, a2i in zip(a1, a2):
                print(a1i, a2i)
            exit()
        yield nlist
Example #15
0
def construct_ligand(fragment_ids, bond_ids, fragment_library):
    """
    Construct a ligand by connecting multiple fragments based on a Combination object

    Parameters
    ----------
    fragment_ids: list of str
        Fragment IDs of recombined ligand, e.g. `["SE_2", "AP_0", "FP_2"]` (`<subpocket>_<fragment index in subpocket pool>`).
    bond_ids : list of list of str
        Bond IDs of recombined ligand, e.g. `[["FP_6", "AP_10"], ["AP_11", "SE_13"]]`: Atom (`<subpocket>_<atom ID>`) pairs per fragment bond.
    fragment_library : dict of pandas.DataFrame
        SMILES and RDKit molecules for fragments (values) per subpocket (key).

    Returns
    -------
    ligand: rdkit.Chem.rdchem.Mol or None
        Recombined ligand (or None if the ligand could not be constructed)
    """

    fragments = []
    for fragment_id in fragment_ids:

        # Get subpocket and fragment index in subpocket
        subpocket = fragment_id[:2]
        fragment_index = int(fragment_id[3:])
        fragment = fragment_library[subpocket].ROMol_original[fragment_index]

        # Store unique atom identifiers in original molecule (important for recombined ligand construction based on atom IDs)
        fragment = Chem.RemoveHs(fragment)
        for i, atom in enumerate(fragment.GetAtoms()):
            fragment_atom_id = f"{subpocket}_{i}"
            atom.SetProp("fragment_atom_id", fragment_atom_id)
            atom.SetProp("fragment_id", fragment.GetProp("complex_pdb"))
        fragment = PropertyMol(fragment)

        # Append fragment to list of fragments
        fragments.append(fragment)

    # Combine fragments using map-reduce model
    combo = reduce(Chem.CombineMols, fragments)

    bonds_matching = True
    ed_combo = Chem.EditableMol(combo)
    replaced_dummies = []

    atoms = combo.GetAtoms()

    for bond in bond_ids:

        dummy_1 = next(atom for atom in combo.GetAtoms()
                       if atom.GetProp("fragment_atom_id") == bond[0])
        dummy_2 = next(atom for atom in combo.GetAtoms()
                       if atom.GetProp("fragment_atom_id") == bond[1])
        atom_1 = dummy_1.GetNeighbors()[0]
        atom_2 = dummy_2.GetNeighbors()[0]

        # check bond types
        bond_type_1 = combo.GetBondBetweenAtoms(dummy_1.GetIdx(),
                                                atom_1.GetIdx()).GetBondType()
        bond_type_2 = combo.GetBondBetweenAtoms(dummy_2.GetIdx(),
                                                atom_2.GetIdx()).GetBondType()
        if bond_type_1 != bond_type_2:
            bonds_matching = False
            break

        ed_combo.AddBond(atom_1.GetIdx(), atom_2.GetIdx(), order=bond_type_1)

        replaced_dummies.extend([dummy_1.GetIdx(), dummy_2.GetIdx()])

    # Do not construct this ligand if bond types are not matching
    if not bonds_matching:
        return

    # Remove replaced dummy atoms
    replaced_dummies.sort(reverse=True)
    for dummy in replaced_dummies:
        ed_combo.RemoveAtom(dummy)

    ligand = ed_combo.GetMol()

    # Replace remaining dummy atoms with hydrogens
    du = Chem.MolFromSmiles("*")
    h = Chem.MolFromSmiles("[H]", sanitize=False)
    ligand = AllChem.ReplaceSubstructs(ligand, du, h, replaceAll=True)[0]
    try:
        ligand = Chem.RemoveHs(ligand)
    except ValueError:
        print(Chem.MolToSmiles(ligand))
        return

    # Clear properties
    for prop in ligand.GetPropNames():
        ligand.ClearProp(prop)
    for atom in ligand.GetAtoms():
        atom.ClearProp("fragment_atom_id")

    # Generate 2D coordinates
    AllChem.Compute2DCoords(ligand)

    return ligand
                             Chem.rdchem.BondType.DOUBLE, 
                             Chem.rdchem.BondType.TRIPLE,
                             Chem.rdchem.BondType.AROMATIC]
smi_arr = []
mol_arr = []
ct = 0
len_iter = 0
while True:
    len_iter+=1
    if ct == 100:
        break
    try:
        sample = model.sample(1, 8)[0]
        sample = [sample[0][0], sample[1][0],sample[2][0]]
        new_mol = Chem.Mol()
        new_mol = Chem.EditableMol(new_mol)
        for i in range(8):
            new_atom_type = atom_types[np.argmax(sample[0][i])]
            new_atom = Chem.Atom(new_atom_type)
            ind = new_mol.AddAtom(new_atom)
            assert ind == i
        for i in range(8):
            for j in range(i+1, 8):
                if sample[2][i, j, 1] > 0.5:
                    new_mol.AddBond(i, j, Chem.rdchem.BondType.SINGLE)
        mol = new_mol.GetMol()
        Chem.SanitizeMol(mol)
        smi = Chem.MolToSmiles(mol)
        if '.' in smi:
            continue
        print(smi)
def func_chain_retro(lp, rxn):
    '''
    Functions to retrosynthetically functionalize chains. The majority of reactions only transform at most one match.
    '''
    if type(lp) == str or type(
            lp
    ) == Chem.rdchem.Mol:  #only for convenience. Pass in LinearPol object when possible
        lp = ru.LinearPol(lp)

    if rxn in ['aldean', 'thiol-ene', 'azal']:
        ls = func_chain_rxns[rxn]
        for smart in ls:
            RD_rxn = Chem.AllChem.ReactionFromSmarts(
                smart)  #RDKit reaction object
            rxn_out = RD_rxn.RunReactants((lp.mol, ))
            if len(rxn_out) != 0:
                for mol in rxn_out[0]:  #only look at first set of reactants
                    if mol.HasSubstructMatch(Chem.MolFromSmarts('[#0]')):
                        return [mol]
        return []

    replace_group = func_chain_rxns[rxn]
    if rxn == 'nitro_base':
        sc = lp.SideChainMol()
        if sc is None:
            return []
        sc_matches = sc.GetSubstructMatches(replace_group)
        if len(sc_matches) != 0:
            return []
    matches = lp.mol.GetSubstructMatches(replace_group)
    if len(matches) == 0:
        return []
    flat_matches = ru.flatten_ll(matches)
    if lp.star_inds[0] in flat_matches or lp.star_inds[
            1] in flat_matches:  #make sure the connector inds aren't being matched
        return []
    else:
        mols = []
        if rxn == 'nitro_base':  #this branch is for reactions where arbitrary groups must be removed
            frag_mol = Chem.FragmentOnBonds(lp.mol, [
                lp.mol.GetBondBetweenAtoms(i, j).GetIdx() for i, j in matches
            ])
            frag_mols = Chem.GetMolFrags(frag_mol)[1:]
            frag_ls = Chem.MolToSmiles(
                ru.mol_without_atom_index(frag_mol)).split('.')[1:]
            frag_ls = [re.sub('\[[0-9]+\*\]', '*', s) for s in frag_ls]
            unique_frags = {}  #SMILES - list of atom matches
            for i, s in enumerate(frag_ls):
                if s not in unique_frags:
                    unique_frags[s] = list(frag_mols[i])
                else:
                    unique_frags[s].extend(frag_mols[i])
            #for L in range( 1,len(unique_frags.keys())+1 ):
            for combo_matches in unique_frags.values():
                all_atoms = set(range(lp.mol.GetNumAtoms()))
                keep_atoms = all_atoms.difference(combo_matches)
                mols.append(
                    lp.SubChainMol(
                        lp.mol,
                        [lp.mol.GetAtomWithIdx(x) for x in keep_atoms]))
        else:
            n_matches = len(matches)
            for L in range(1, n_matches + 1):
                for match_combo in itertools.combinations(matches, L):
                    o_inds = []
                    for match in match_combo:
                        o_inds.append(match[1])
                        o_inds.append(match[2])
                    o_inds = sorted(o_inds, reverse=True)
                    em = Chem.EditableMol(lp.mol)
                    [em.RemoveAtom(x) for x in o_inds]
                    new_mol = em.GetMol()
                    try:
                        Chem.SanitizeMol(new_mol)
                        mols.append(new_mol)
                    except:
                        pass
    return mols
Example #18
0
def _vec_to_mol(dv, de, atom_list, bpatt_dim, train=False):
    
    def to_dummy(vec, ax=1, thr=1):
    
        return np.concatenate([vec, thr - np.sum(vec, ax, keepdims=True)], ax)
  
    def cat_to_val(vec, cat):
    
        cat.append(0) 
        return np.array(cat)[vec]

    bond_ref = [Chem.BondType.SINGLE, Chem.BondType.DOUBLE, Chem.BondType.TRIPLE]
    
    node_atom = np.argmax(to_dummy(dv[:,-len(atom_list):], 1), 1)
    node_charge = cat_to_val(np.argmax(to_dummy(dv[:,:4], 1), 1), [-1, 1, 2, 3])
    node_exp = cat_to_val(np.argmax(to_dummy(dv[:,4:7], 1), 1), [1, 2, 3])  

    edge_bond = np.argmax(to_dummy(de[:,:,:len(bond_ref)], 2), 2)
    edge_patt = [cat_to_val(np.argmax(to_dummy(de[:,:,len(bond_ref)+sum(bpatt_dim[:i]):len(bond_ref)+sum(bpatt_dim[:i+1])], 2), 2), list(range(1, bpatt_dim[i]+1)) ) for i in range(len(bpatt_dim))]

    selid = np.where(node_atom<len(atom_list))[0]
    
    node_atom = node_atom[selid]
    node_charge = node_charge[selid]
    node_exp = node_exp[selid]

    edge_bond = edge_bond[selid][:,selid]
    edge_patt = [ep[selid][:,selid] for ep in edge_patt]
    
    edmol = Chem.EditableMol(Chem.MolFromSmiles(''))
    
    m = len(node_atom)
    for j in range(m):
        atom_add = Chem.Atom(atom_list[node_atom[j]])
        if node_charge[j] != 0: atom_add.SetFormalCharge(int(node_charge[j]))
        if node_exp[j] > 0: atom_add.SetNumExplicitHs(int(node_exp[j]))
        edmol.AddAtom(atom_add)

    for j in range(m-1):
        for k in range(j+1, m):
            if edge_bond[j, k] < len(bond_ref):
                edmol.AddBond(j, k, bond_ref[edge_bond[j, k]])

    for j in range(len(node_atom)):
  
        for k in range(j + 1, len(node_atom)):
            
            for _ in range(edge_patt[0][j,k]):
                edmol.AddAtom(Chem.Atom('C'))
                edmol.AddAtom(Chem.Atom('C'))
                edmol.AddBond(j, m, bond_ref[0])
                edmol.AddBond(m, m+1, bond_ref[0])
                edmol.AddBond(m+1, k, bond_ref[0])
                m += 2
            
            for _ in range(edge_patt[1][j,k]):
                edmol.AddAtom(Chem.Atom('C'))
                edmol.AddBond(j, m, bond_ref[0])
                edmol.AddBond(m, k, bond_ref[0])
                m += 1
  
            for _ in range(edge_patt[2][j,k]):
                edmol.AddAtom(Chem.Atom('C'))
                edmol.AddAtom(Chem.Atom('C'))
                edmol.AddBond(j, m, bond_ref[0])
                edmol.AddBond(m, m+1, bond_ref[1])
                edmol.AddBond(m+1, k, bond_ref[0])
                m += 2  
                      
            for _ in range(edge_patt[3][j,k]):
                edmol.AddAtom(Chem.Atom('C'))
                edmol.AddAtom(Chem.Atom('C'))
                edmol.AddBond(j, m, bond_ref[1])
                edmol.AddBond(m, m+1, bond_ref[0])
                edmol.AddBond(m+1, k, bond_ref[1])
                m += 2       
                
            for _ in range(edge_patt[4][j,k]):
                edmol.AddAtom(Chem.Atom('N'))
                edmol.AddBond(j, m, bond_ref[0])
                edmol.AddBond(m, k, bond_ref[0])
                m += 1  
                                                      
            for _ in range(edge_patt[5][j,k]):
                edmol.AddAtom(Chem.Atom('O'))
                edmol.AddBond(j, m, bond_ref[0])
                edmol.AddBond(m, k, bond_ref[0])
                m += 1
    
    mol_rec = edmol.GetMol()
    # sanity check
    Chem.SanitizeMol(mol_rec)
        
    mol_n = Chem.MolFromSmiles(Chem.MolToSmiles(mol_rec))
    output = Chem.MolToSmiles(mol_n)  
    
    if train and '.' in output: raise
    
    return output 
Example #19
0
File: rdkit.py Project: BvB93/PLAMS
def to_rdmol(plams_mol, sanitize=True, properties=True, assignChirality=False):
    """
    Translate a PLAMS molecule into an RDKit molecule type.
    PLAMS |Molecule|, |Atom| or |Bond| properties are pickled if they are neither booleans, floats,
    integers, floats nor strings, the resulting property names are appended with '_pickled'.

    :parameter plams_mol: A PLAMS molecule
    :parameter bool sanitize: Kekulize, check valencies, set aromaticity, conjugation and hybridization
    :parameter bool properties: If all |Molecule|, |Atom| and |Bond| properties should be converted from PLAMS to RDKit format.
    :parameter bool assignChirality: Assign R/S and cis/trans information, insofar as this was not yet present in the PLAMS molecule.
    :type plams_mol: |Molecule|
    :return: an RDKit molecule
    :rtype: rdkit.Chem.Mol
    """
    if isinstance(plams_mol, Chem.Mol):
        return plams_mol
    # Create rdkit molecule
    e = Chem.EditableMol(Chem.Mol())

    # Add atoms and assign properties to the RDKit atom if *properties* = True
    for pl_atom in plams_mol.atoms:
        rd_atom = Chem.Atom(pl_atom.atnum)
        if 'charge' in pl_atom.properties:
            rd_atom.SetFormalCharge(pl_atom.properties.charge)
        if properties:
            if 'pdb_info' in pl_atom.properties:
                set_PDBresidueInfo(rd_atom, pl_atom.properties.pdb_info)
            for prop in pl_atom.properties:
                if prop not in ('charge', 'pdb_info', 'stereo'):
                    prop_to_rdmol(pl_atom, rd_atom, prop)

        # Check for R/S information
        if pl_atom.properties.stereo:
            stereo = pl_atom.properties.stereo.lower()
            if stereo == 'counter-clockwise':
                rd_atom.SetChiralTag(
                    Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW)
            elif stereo == 'clockwise':
                rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW)
        e.AddAtom(rd_atom)

    # Mapping of PLAMS bond orders to RDKit bond types:
    def plams_to_rd_bonds(bo):
        if bo > 1.4 and bo < 1.6:
            return 12  # bond type for aromatic bond
        else:
            return int(bo)

    # Add bonds to the RDKit molecule
    for bond in plams_mol.bonds:
        a1 = plams_mol.atoms.index(bond.atom1)
        a2 = plams_mol.atoms.index(bond.atom2)
        e.AddBond(a1, a2, Chem.BondType(plams_to_rd_bonds(bond.order)))
    rdmol = e.GetMol()

    # Check for cis/trans information
    for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()):
        if pl_bond.properties.stereo:
            stereo = pl_bond.properties.stereo.lower()
            if stereo == 'e' or stereo == 'trans':
                rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOE)
            elif stereo == 'z' or stereo == 'cis':
                rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ)
            elif stereo == 'up':
                rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDUPRIGHT)
            elif stereo == 'down':
                rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDDOWNRIGHT)

    # Assign properties to RDKit molecule and bonds if *properties* = True
    if properties:
        for prop in plams_mol.properties:
            prop_to_rdmol(plams_mol, rdmol, prop)
        for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()):
            for prop in pl_bond.properties:
                if prop != 'stereo':
                    prop_to_rdmol(pl_bond, rd_bond, prop)

    if sanitize:
        Chem.SanitizeMol(rdmol)
    conf = Chem.Conformer()
    for i, atom in enumerate(plams_mol.atoms):
        xyz = Geometry.Point3D(atom._getx(), atom._gety(), atom._getz())
        conf.SetAtomPosition(i, xyz)
    rdmol.AddConformer(conf)
    # REB: Assign all stereochemistry, if it wasn't already there
    if assignChirality:
        Chem.rdmolops.AssignAtomChiralTagsFromStructure(
            rdmol, confId=conf.GetId(), replaceExistingTags=False)
        try:
            Chem.AssignStereochemistryFrom3D(rdmol,
                                             confId=conf.GetId(),
                                             replaceExistingTags=False)
        except AttributeError:
            pass
    return rdmol
def ring_fixer(cross_mol, parent_mol):
    '''two point crossover can lead to too short and too long molecules,
    this function either adds or subtracts from the molecule'''
    rings = cross_mol.GetRingInfo()
    num_rings = rings.NumRings()
    print "number of rings = " + str(num_rings)
    if num_rings == 5:
        new_mol = cross_mol
    if num_rings < 5:
        ring_ats = rings.AtomRings()[num_rings - 1]
        #print ring_ats
        end_ats = []
        for atom in cross_mol.GetAtoms():
            if atom.GetIdx() in ring_ats:
                if len(atom.GetBonds()) == 2:
                    end_ats.append(atom.GetIdx())
        #print end_ats
        #print num_rings
        cp = return_cut_points(parent_mol)
        #print cp
        ring_cp = [cp[num_rings - 1] + cp[-num_rings]]
        print ring_cp
        #actual_cp =  (ring_cp[0]) + (ring_cp[1])
        actual_cp = get_actual_cut_pts(parent_mol, num_rings, cp)
        print actual_cp
        #add_ring_cp = get_actual_cut_pts(parent_mol,num_rings-1,cp)
        additional_ring = frag_mol(parent_mol, actual_cp, bf='n')[1]
        #print Chem.MolToSmiles(cross_mol)
        #print Chem.MolToSmiles(additional_ring)
        te2_m = Chem.EditableMol(cross_mol)
        te2_m.ReplaceAtom(end_ats[1], Chem.Atom(0))
        te2_m.ReplaceAtom(end_ats[2], Chem.Atom(0))
        tm2 = te2_m.GetMol()
        Chem.SanitizeMol(tm2)
        #print Chem.MolToSmiles(tm2)
        temp_mol = Chem.CombineMols(tm2, additional_ring)
        #new_mol = cross_mol
        new_mol = join_mols(temp_mol)
    if num_rings > 5:
        #print num_rings
        cut_pts = return_cut_points(cross_mol)
        #print cut_pts
        ring_ats = rings.AtomRings()[5]
        #if n_diff >=2:
        #    ring_ats = rings.AtomRings()[4]#num_rings-1]
        #print ring_ats
        cut_atoms = []
        cut_atoms.append(ring_ats[-1])
        cut_atoms.append(ring_ats[-2])
        print cut_atoms
        cp_tup = [(cut_at, cut_pt) for cut_at, cut_pt in cut_pts
                  if cut_at in cut_atoms]
        #actual_cp = get_actual_cut_pts(cross_mol,4,cut_pts)
        #print actual_cp
        frags = frag_mol(cross_mol, cp_tup, bf='n')
        temp_mol = frags[0]
        dum_ats = find_dummy_atoms(temp_mol)
        #print dum_ats
        #print dum_ats[0][1][0]
        #print dum_ats[1][1][0]
        te_m = Chem.EditableMol(temp_mol)
        rem_at = te_m.RemoveAtom(dum_ats[1][0])
        rem_at2 = te_m.RemoveAtom(dum_ats[0][0])
        new_at = te_m.AddAtom(Chem.Atom(
            6))  ## these new atoms are added onto the end of the atom idx
        new_at2 = te_m.AddAtom(
            Chem.Atom(6))  ##all orig atoms are moved down by 1
        #print new_at
        #print new_at2
        te_m.AddBond(new_at, dum_ats[0][1][0], Chem.BondType.AROMATIC)
        te_m.AddBond(new_at, new_at2, Chem.BondType.AROMATIC)
        te_m.AddBond(new_at2, dum_ats[-1][-1][1], Chem.BondType.AROMATIC)
        tm = te_m.GetMol()
        Chem.SanitizeMol(tm)
        #trings = tm.GetRingInfo()
        #tnr = trings.NumRings()
        new_mol = tm
        #prop_cross.append(new_mol)
    return new_mol
Example #21
0
 def dethiolate(self) -> Chem.Mol:
     dethio = Chem.EditableMol(self.thio_mol)
     dethio.RemoveAtom(self.SX_idx)
     return dethio.GetMol()
Example #22
0
    def getFragmentB(self):

        frag = Chem.EditableMol(self.frag2).GetMol()
        for atom in frag.GetAtoms():
            atom.ClearProp('molAtomMapNumber')
        return Chem.MolToSmiles(frag)
Example #23
0
def BreakBRICSBonds(mol, bonds=None, sanitize=True, silent=True):
    """ breaks the BRICS bonds in a molecule and returns the results

    >>> from rdkit import Chem
    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> m2=BreakBRICSBonds(m)
    >>> Chem.MolToSmiles(m2,True)
    '[3*]O[3*].[4*]CC.[4*]CCC'

    a more complicated case:

    >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1')
    >>> m2=BreakBRICSBonds(m)
    >>> Chem.MolToSmiles(m2,True)
    '[16*]c1ccccc1.[3*]O[3*].[4*]CCC.[4*]CCC([6*])=O'


    can also specify a limited set of bonds to work with:

    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> m2 = BreakBRICSBonds(m,[((3, 2), ('3', '4'))])
    >>> Chem.MolToSmiles(m2,True)
    '[3*]OCC.[4*]CCC'

    this can be used as an alternate approach for doing a BRICS decomposition by
    following BreakBRICSBonds with a call to Chem.GetMolFrags:

    >>> m = Chem.MolFromSmiles('CCCOCC')
    >>> m2=BreakBRICSBonds(m)
    >>> frags = Chem.GetMolFrags(m2,asMols=True)
    >>> [Chem.MolToSmiles(x,True) for x in frags]
    ['[4*]CCC', '[3*]O[3*]', '[4*]CC']

    """
    if not bonds:
        #bonds = FindBRICSBonds(mol)
        res = Chem.FragmentOnBRICSBonds(mol)
        if sanitize:
            Chem.SanitizeMol(res)
        return res
    eMol = Chem.EditableMol(mol)
    nAts = mol.GetNumAtoms()

    dummyPositions = []
    for indices, dummyTypes in bonds:
        ia, ib = indices
        obond = mol.GetBondBetweenAtoms(ia, ib)
        bondType = obond.GetBondType()
        eMol.RemoveBond(ia, ib)

        da, db = dummyTypes
        atoma = Chem.Atom(0)
        atoma.SetIsotope(int(da))
        atoma.SetNoImplicit(True)
        idxa = nAts
        nAts += 1
        eMol.AddAtom(atoma)
        eMol.AddBond(ia, idxa, bondType)

        atomb = Chem.Atom(0)
        atomb.SetIsotope(int(db))
        atomb.SetNoImplicit(True)
        idxb = nAts
        nAts += 1
        eMol.AddAtom(atomb)
        eMol.AddBond(ib, idxb, bondType)
        if mol.GetNumConformers():
            dummyPositions.append((idxa, ib))
            dummyPositions.append((idxb, ia))
    res = eMol.GetMol()
    if sanitize:
        Chem.SanitizeMol(res)
    if mol.GetNumConformers():
        for conf in mol.GetConformers():
            resConf = res.GetConformer(conf.GetId())
            for ia, pa in dummyPositions:
                resConf.SetAtomPosition(ia, conf.GetAtomPosition(pa))
    return res
Example #24
0
    def handle_covalent_mol(self, lig_res_name, non_cov_mol):
        '''
        Do some magic if we think the molecule has a covalent attachment
        :param lig_res_name: Name of the covalent ligand
        :param non_cov_mol: Previous .mol file that does not have covalent attachment in it.
        :return: A new mol file IF the lig_res is indeed covalent.
        '''
        # original pdb = self.pdbfile (already aligned)
        # lig res name = name of ligand to find link for

        covalent = False

        for line in self.pdbfile:
            if 'LINK' in line:
                zero = line[13:27]
                one = line[43:57]

                if lig_res_name in zero:
                    res = one
                    covalent = True

                if lig_res_name in one:
                    res = zero
                    covalent = True

        if covalent:
            for line in self.pdbfile:
                if 'ATOM' in line and line[13:27] == res:
                    res_x = float(line[31:39])
                    res_y = float(line[39:47])
                    res_z = float(line[47:55])
                    res_coords = [res_x, res_y, res_z]
                    print(res_coords)
                    atm = Chem.MolFromPDBBlock(line)
                    atm_trans = atm.GetAtomWithIdx(0)

            orig_pdb_block = Chem.MolToPDBBlock(non_cov_mol)

            lig_block = '\n'.join(
                [l for l in orig_pdb_block.split('\n') if 'COMPND' not in l])
            lig_lines = [l for l in lig_block.split('\n') if 'HETATM' in l]
            j = 0
            old_dist = 100
            for line in lig_lines:
                j += 1
                #                 print(line)
                if 'HETATM' in line:
                    coords = [
                        line[31:39].strip(), line[39:47].strip(),
                        line[47:55].strip()
                    ]
                    dist = self.get_3d_distance(coords, res_coords)

                    if dist < old_dist:
                        ind_to_add = j
                        print(dist)
                        old_dist = dist

            i = non_cov_mol.GetNumAtoms()
            edmol = Chem.EditableMol(non_cov_mol)
            edmol.AddAtom(atm_trans)
            edmol.AddBond(ind_to_add - 1, i, Chem.BondType.SINGLE)
            new_mol = edmol.GetMol()
            conf = new_mol.GetConformer()
            conf.SetAtomPosition(
                i, Point3D(res_coords[0], res_coords[1], res_coords[2]))

            return new_mol