Beispiel #1
0
def test(a_mol, b_smiles, transform):
    a_prods = transform.RunReactants([a_mol])
    if not a_prods:
        return False

    a_prods = [standardize(i) for i in chain(*a_prods)]

    if not any(["[*]" in i for i in a_prods]):
        if b_smiles in a_prods:
            return True
        return False

    else:
        qp = Chem.AdjustQueryParameters()
        qp.makeDummiesQueries = True
        qp.adjustDegree = True
        qp.adjustDegreeFlags = Chem.ADJUST_IGNOREDUMMIES

        a_prods = [Chem.AddHs(i) for i in a_prods]
        a_prods = [Chem.AdjustQueryProperties(i, qp) for i in a_prods]

        b_mol = Chem.MolFromSmiles(b_smiles)
        b_mol = Chem.AddHs(b_mol)

        if any([b_mol.HasSubstructMatch(i) for i in a_prods]):
            return True

        return False
Beispiel #2
0
    def _load_template(self, path):
        """
        Loads a template molecule with 2D coordinates

        Args:
            path (str): path to the model molecule in *.sdf,
                or *.pdb format

        Raises:
            ValueError: if unsupported format is used: sdf|pdb

        Returns:
            rdkit.Chem.rdchem.Mol: RDKit representation of the template
        """
        mol = Chem.RWMol()
        extension = os.path.basename(path).split('.')[1]

        if extension == 'sdf':
            mol = Chem.MolFromMolFile(path, sanitize=True, removeHs=True)
        elif extension == 'pdb':
            mol = Chem.MolFromPDBFile(path, sanitize=True, removeHs=True)
        else:
            raise ValueError(
                'Unsupported molecule type \'{}\''.format(extension))

        p = Chem.AdjustQueryParameters()
        p.makeAtomsGeneric = True
        p.makeBondsGeneric = True

        mol = Chem.AdjustQueryProperties(mol, p)

        return mol
Beispiel #3
0
    def __init__(self):

        self.frag_list   = list()
        self.mol_list    = list()
        self.N_frag      = 0
        self.N_mol       = 0
        self.frag2mol    = list()
        self.mol2frag    = list()

        self.max_mol2frag = 0
        self.max_frag2mol = 0

        self.qp = Chem.AdjustQueryParameters()

        self.frag2mol_mapping = list()
Beispiel #4
0
    def _get_ligands(self):
        ligands_df = pd.read_sql_query(
            """
                select pdbid, mol_send(molecule) as molecule, atoms, rings, aromatic_rings, weight
                from {ligands}
                where molecule is not null
        """.format(ligands=PopulateLigandsScript.LIGANDS_DB), self.conn)

        params = Chem.AdjustQueryParameters()
        params.makeAtomsGeneric = True
        params.makeBondsGeneric = True
        params.adjustRingCount = True

        ligands_df.loc[:, "molecule"] = ligands_df.loc[:, "molecule"].apply(
            lambda m: Chem.Mol(m.tobytes()))
        ligands_df.loc[:, "pattern"] = ligands_df.loc[:, "molecule"].apply(
            lambda m: Chem.AdjustQueryProperties(m, params))

        return ligands_df
Beispiel #5
0
def align_mol_to_frags(smi_molecule, smi_linker, smi_frags):
    try:
        # Load SMILES as molecules
        mol = Chem.MolFromSmiles(smi_molecule)
        frags = Chem.MolFromSmiles(smi_frags)
        linker = Chem.MolFromSmiles(smi_linker)
        # Include dummy atoms in query
        du = Chem.MolFromSmiles('*')
        qp = Chem.AdjustQueryParameters()
        qp.makeDummiesQueries = True

        # Renumber molecule based on frags (incl. dummy atoms)
        aligned_mols = []

        sub_idx = []
        # Get matches to fragments and linker
        qfrag = Chem.AdjustQueryProperties(frags, qp)
        frags_matches = list(mol.GetSubstructMatches(qfrag, uniquify=False))
        qlinker = Chem.AdjustQueryProperties(linker, qp)
        linker_matches = list(mol.GetSubstructMatches(qlinker, uniquify=False))

        # Loop over matches
        for frag_match, linker_match in product(frags_matches, linker_matches):
            # Check if match
            f_match = [
                idx for num, idx in enumerate(frag_match)
                if frags.GetAtomWithIdx(num).GetAtomicNum() != 0
            ]
            l_match = [
                idx for num, idx in enumerate(linker_match)
                if linker.GetAtomWithIdx(num).GetAtomicNum() != 0
                and idx not in f_match
            ]
            # If perfect match, break
            if len(set(list(f_match) +
                       list(l_match))) == mol.GetNumHeavyAtoms():
                break
        # Add frag indices
        sub_idx += frag_match
        # Add linker indices to end
        sub_idx += [
            idx for num, idx in enumerate(linker_match)
            if linker.GetAtomWithIdx(num).GetAtomicNum() != 0
            and idx not in sub_idx
        ]

        aligned_mols.append(Chem.rdmolops.RenumberAtoms(mol, sub_idx))
        aligned_mols.append(frags)

        nodes_to_keep = [i for i in range(len(frag_match))]

        # Renumber dummy atoms to end
        dummy_idx = []
        for atom in aligned_mols[1].GetAtoms():
            if atom.GetAtomicNum() == 0:
                dummy_idx.append(atom.GetIdx())
        for i, mol in enumerate(aligned_mols):
            sub_idx = list(range(aligned_mols[1].GetNumHeavyAtoms() + 2))
            for idx in dummy_idx:
                sub_idx.remove(idx)
                sub_idx.append(idx)
            if i == 0:
                mol_range = list(range(mol.GetNumHeavyAtoms()))
            else:
                mol_range = list(range(mol.GetNumHeavyAtoms() + 2))
            idx_to_add = list(set(mol_range).difference(set(sub_idx)))
            sub_idx.extend(idx_to_add)
            aligned_mols[i] = Chem.rdmolops.RenumberAtoms(mol, sub_idx)

        # Get exit vectors
        exit_vectors = []
        for atom in aligned_mols[1].GetAtoms():
            if atom.GetAtomicNum() == 0:
                if atom.GetIdx() in nodes_to_keep:
                    nodes_to_keep.remove(atom.GetIdx())
                for nei in atom.GetNeighbors():
                    exit_vectors.append(nei.GetIdx())

        if len(exit_vectors) != 2:
            print("Incorrect number of exit vectors")

        return (aligned_mols[0], aligned_mols[1]), nodes_to_keep, exit_vectors

    except:
        print("Could not align")
        return ([], []), [], []
def compute_distance_and_angle(mol, smi_linker, smi_frags):
    try:
        frags = [Chem.MolFromSmiles(frag) for frag in smi_frags.split(".")]
        frags = Chem.MolFromSmiles(smi_frags)
        linker = Chem.MolFromSmiles(smi_linker)
        # Include dummy in query
        du = Chem.MolFromSmiles('*')
        qp = Chem.AdjustQueryParameters()
        qp.makeDummiesQueries = True
        # Renumber based on frags (incl. dummy atoms)
        aligned_mols = []

        sub_idx = []
        # Align to frags and linker
        qfrag = Chem.AdjustQueryProperties(frags, qp)
        frags_matches = list(mol.GetSubstructMatches(qfrag, uniquify=False))
        qlinker = Chem.AdjustQueryProperties(linker, qp)
        linker_matches = list(mol.GetSubstructMatches(qlinker, uniquify=False))

        # Loop over matches
        for frag_match, linker_match in product(frags_matches, linker_matches):
            # Check if match
            f_match = [
                idx for num, idx in enumerate(frag_match)
                if frags.GetAtomWithIdx(num).GetAtomicNum() != 0
            ]
            l_match = [
                idx for num, idx in enumerate(linker_match)
                if linker.GetAtomWithIdx(num).GetAtomicNum() != 0
                and idx not in f_match
            ]
            if len(set(list(f_match) +
                       list(l_match))) == mol.GetNumHeavyAtoms():
                #if len(set(list(frag_match)+list(linker_match))) == mol.GetNumHeavyAtoms():
                break
        # Add frag indices
        sub_idx += frag_match
        # Add linker indices to end
        sub_idx += [
            idx for num, idx in enumerate(linker_match)
            if linker.GetAtomWithIdx(num).GetAtomicNum() != 0
            and idx not in sub_idx
        ]

        nodes_to_keep = [i for i in range(len(frag_match))]

        aligned_mols.append(Chem.rdmolops.RenumberAtoms(mol, sub_idx))
        aligned_mols.append(frags)

        # Renumber dummy atoms to end
        dummy_idx = []
        for atom in aligned_mols[1].GetAtoms():
            if atom.GetAtomicNum() == 0:
                dummy_idx.append(atom.GetIdx())
        for i, mol in enumerate(aligned_mols):
            sub_idx = list(range(aligned_mols[1].GetNumHeavyAtoms() + 2))
            for idx in dummy_idx:
                sub_idx.remove(idx)
                sub_idx.append(idx)
            if i == 0:
                mol_range = list(range(mol.GetNumHeavyAtoms()))
            else:
                mol_range = list(range(mol.GetNumHeavyAtoms() + 2))
            idx_to_add = list(set(mol_range).difference(set(sub_idx)))
            sub_idx.extend(idx_to_add)
            aligned_mols[i] = Chem.rdmolops.RenumberAtoms(mol, sub_idx)

        # Get exit vectors
        exit_vectors = []
        linker_atom_idx = []
        for atom in aligned_mols[1].GetAtoms():
            if atom.GetAtomicNum() == 0:
                if atom.GetIdx() in nodes_to_keep:
                    nodes_to_keep.remove(atom.GetIdx())
                for nei in atom.GetNeighbors():
                    exit_vectors.append(nei.GetIdx())
                linker_atom_idx.append(atom.GetIdx())

        # Get coords
        conf = aligned_mols[0].GetConformer()
        exit_coords = []
        for exit in exit_vectors:
            exit_coords.append(np.array(conf.GetAtomPosition(exit)))
        linker_coords = []
        for linker_atom in linker_atom_idx:
            linker_coords.append(np.array(conf.GetAtomPosition(linker_atom)))

        # Get angle
        v1_u = unit_vector(linker_coords[0] - exit_coords[0])
        v2_u = unit_vector(linker_coords[1] - exit_coords[1])
        angle = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0))

        # Get linker length
        linker = Chem.MolFromSmiles(smi_linker)
        linker_length = linker.GetNumHeavyAtoms()

        # Get distance
        distance = np.linalg.norm(exit_coords[0] - exit_coords[1])

        # Record results
        return distance, angle

    except:
        print(Chem.MolToSmiles(mol), smi_linker, smi_frags)
        return None, None
def get_linker(full_mol, clean_frag, starting_point):
    # INPUT FORMAT: molecule (RDKit mol object), clean fragments (RDKit mol object), starting fragments (SMILES)

    # Get matches of fragments
    matches = list(full_mol.GetSubstructMatches(clean_frag))

    # If no matches, terminate
    if len(matches) == 0:
        print("No matches")
        return ""

    # Get number of atoms in linker
    linker_len = full_mol.GetNumHeavyAtoms() - clean_frag.GetNumHeavyAtoms()
    if linker_len == 0:
        return ""

    # Setup
    mol_to_break = Chem.Mol(full_mol)
    Chem.Kekulize(full_mol, clearAromaticFlags=True)

    poss_linker = []

    if len(matches) > 0:
        # Loop over matches
        for match in matches:
            mol_rw = Chem.RWMol(full_mol)
            # Get linker atoms
            linker_atoms = list(
                set(list(range(
                    full_mol.GetNumHeavyAtoms()))).difference(match))
            linker_bonds = []
            atoms_joined_to_linker = []
            # Loop over starting fragments atoms
            # Get (i) bonds between starting fragments and linker, (ii) atoms joined to linker
            for idx_to_delete in sorted(match, reverse=True):
                nei = [
                    x.GetIdx() for x in mol_rw.GetAtomWithIdx(
                        idx_to_delete).GetNeighbors()
                ]
                intersect = set(nei).intersection(set(linker_atoms))
                if len(intersect) == 1:
                    linker_bonds.append(
                        mol_rw.GetBondBetweenAtoms(
                            idx_to_delete,
                            list(intersect)[0]).GetIdx())
                    atoms_joined_to_linker.append(idx_to_delete)
                elif len(intersect) > 1:
                    for idx_nei in list(intersect):
                        linker_bonds.append(
                            mol_rw.GetBondBetweenAtoms(idx_to_delete,
                                                       idx_nei).GetIdx())
                        atoms_joined_to_linker.append(idx_to_delete)

            # Check number of atoms joined to linker
            # If not == 2, check next match
            if len(set(atoms_joined_to_linker)) != 2:
                continue

            # Delete starting fragments atoms
            for idx_to_delete in sorted(match, reverse=True):
                mol_rw.RemoveAtom(idx_to_delete)

            linker = Chem.Mol(mol_rw)
            # Check linker required num atoms
            if linker.GetNumHeavyAtoms() == linker_len:
                mol_rw = Chem.RWMol(full_mol)
                # Delete linker atoms
                for idx_to_delete in sorted(linker_atoms, reverse=True):
                    mol_rw.RemoveAtom(idx_to_delete)
                frags = Chem.Mol(mol_rw)
                # Check there are two disconnected fragments
                if len(Chem.rdmolops.GetMolFrags(frags)) == 2:
                    # Fragment molecule into starting fragments and linker
                    fragmented_mol = Chem.FragmentOnBonds(
                        mol_to_break, linker_bonds)
                    # Remove starting fragments from fragmentation
                    linker_to_return = Chem.Mol(fragmented_mol)
                    qp = Chem.AdjustQueryParameters()
                    qp.makeDummiesQueries = True
                    for f in starting_point.split('.'):
                        qfrag = Chem.AdjustQueryProperties(
                            Chem.MolFromSmiles(f), qp)
                        linker_to_return = AllChem.DeleteSubstructs(
                            linker_to_return, qfrag, onlyFrags=True)

                    # Check linker is connected and two bonds to outside molecule
                    if len(Chem.rdmolops.GetMolFrags(linker)) == 1 and len(
                            linker_bonds) == 2:
                        Chem.Kekulize(linker_to_return,
                                      clearAromaticFlags=True)
                        # If for some reason a starting fragment isn't removed (and it's larger than the linker), remove (happens v. occassionally)
                        if len(Chem.rdmolops.GetMolFrags(
                                linker_to_return)) > 1:
                            for frag in Chem.MolToSmiles(
                                    linker_to_return).split('.'):
                                if Chem.MolFromSmiles(
                                        frag).GetNumHeavyAtoms() == linker_len:
                                    return frag
                        return Chem.MolToSmiles(
                            Chem.MolFromSmiles(
                                Chem.MolToSmiles(linker_to_return)))

                    # If not, add to possible linkers (above doesn't capture some complex cases)
                    else:
                        fragmented_mol = Chem.MolFromSmiles(
                            Chem.MolToSmiles(fragmented_mol), sanitize=False)
                        linker_to_return = AllChem.DeleteSubstructs(
                            fragmented_mol, Chem.MolFromSmiles(starting_point))
                        poss_linker.append(Chem.MolToSmiles(linker_to_return))

    # If only one possibility, return linker
    if len(poss_linker) == 1:
        return poss_linker[0]
    # If no possibilities, process failed
    elif len(poss_linker) == 0:
        print("FAIL:", Chem.MolToSmiles(full_mol),
              Chem.MolToSmiles(clean_frag), starting_point)
        return ""
    # If multiple possibilities, process probably failed
    else:
        print("More than one poss linker. ", poss_linker)
        return poss_linker[0]
def join_frag_linker(linker, st_pt, random_join=True):

    if linker == "":
        du = Chem.MolFromSmiles('*')
        #print(Chem.MolToSmiles(Chem.RemoveHs(AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt),du,Chem.MolFromSmiles('[H]'),True)[0])).split('.')[0])
        return Chem.MolToSmiles(
            Chem.RemoveHs(
                AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt), du,
                                          Chem.MolFromSmiles('[H]'),
                                          True)[0])).split('.')[0]

    combo = Chem.CombineMols(Chem.MolFromSmiles(linker),
                             Chem.MolFromSmiles(st_pt))

    # Include dummy in query
    du = Chem.MolFromSmiles('*')
    qp = Chem.AdjustQueryParameters()
    qp.makeDummiesQueries = True

    qlink = Chem.AdjustQueryProperties(Chem.MolFromSmiles(linker), qp)
    linker_atoms = combo.GetSubstructMatches(qlink)
    if len(linker_atoms) > 1:
        for l_atoms in linker_atoms:
            count_dummy = 0
            for a in l_atoms:
                if combo.GetAtomWithIdx(a).GetAtomicNum() == 0:
                    count_dummy += 1
            if count_dummy == 2:
                break
        linker_atoms = l_atoms
    else:
        linker_atoms = linker_atoms[0]
    linker_dummy_bonds = []
    linker_dummy_bonds_at = []
    linker_exit_points = []
    for atom in linker_atoms:
        if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0:
            linker_dummy_bonds.append(
                combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx())
            linker_dummy_bonds_at.append(
                (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()))
            linker_exit_points.append(
                combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())

    qst_pt = Chem.AdjustQueryProperties(Chem.MolFromSmiles(st_pt), qp)
    st_pt_atoms = combo.GetSubstructMatches(qst_pt)
    st_pt_atoms = list(
        set(range(combo.GetNumAtoms())).difference(linker_atoms))

    st_pt_dummy_bonds = []
    st_pt_dummy_bonds_at = []
    st_pt_exit_points = []
    for atom in st_pt_atoms:
        if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0:
            st_pt_dummy_bonds.append(
                combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx())
            st_pt_dummy_bonds_at.append(
                (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()))
            st_pt_exit_points.append(
                combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())

    combo_rw = Chem.EditableMol(combo)

    if random_join:
        np.random.shuffle(st_pt_exit_points)
        for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_points):
            if atom_1 == atom_2:
                print(linker, st_pt)
                break
            combo_rw.AddBond(atom_1, atom_2, order=Chem.rdchem.BondType.SINGLE)

        bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at
        for bond in sorted(bonds_to_break, reverse=True):
            combo_rw.RemoveBond(bond[0], bond[1])

        final_mol = combo_rw.GetMol()
        final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'),
                           key=lambda x: len(x),
                           reverse=True)[0]
        return final_mol

    else:
        final_mols = []
        for st_pt_exit_pts in [st_pt_exit_points, st_pt_exit_points[::-1]]:
            combo_rw = Chem.EditableMol(combo)
            for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_pts):
                if atom_1 == atom_2:
                    print(linker, st_pt)
                    break
                combo_rw.AddBond(atom_1,
                                 atom_2,
                                 order=Chem.rdchem.BondType.SINGLE)

            bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at
            for bond in sorted(bonds_to_break, reverse=True):
                combo_rw.RemoveBond(bond[0], bond[1])

            final_mol = combo_rw.GetMol()
            final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'),
                               key=lambda x: len(x),
                               reverse=True)[0]
            final_mols.append(final_mol)
        return final_mols