def test(a_mol, b_smiles, transform): a_prods = transform.RunReactants([a_mol]) if not a_prods: return False a_prods = [standardize(i) for i in chain(*a_prods)] if not any(["[*]" in i for i in a_prods]): if b_smiles in a_prods: return True return False else: qp = Chem.AdjustQueryParameters() qp.makeDummiesQueries = True qp.adjustDegree = True qp.adjustDegreeFlags = Chem.ADJUST_IGNOREDUMMIES a_prods = [Chem.AddHs(i) for i in a_prods] a_prods = [Chem.AdjustQueryProperties(i, qp) for i in a_prods] b_mol = Chem.MolFromSmiles(b_smiles) b_mol = Chem.AddHs(b_mol) if any([b_mol.HasSubstructMatch(i) for i in a_prods]): return True return False
def _load_template(self, path): """ Loads a template molecule with 2D coordinates Args: path (str): path to the model molecule in *.sdf, or *.pdb format Raises: ValueError: if unsupported format is used: sdf|pdb Returns: rdkit.Chem.rdchem.Mol: RDKit representation of the template """ mol = Chem.RWMol() extension = os.path.basename(path).split('.')[1] if extension == 'sdf': mol = Chem.MolFromMolFile(path, sanitize=True, removeHs=True) elif extension == 'pdb': mol = Chem.MolFromPDBFile(path, sanitize=True, removeHs=True) else: raise ValueError( 'Unsupported molecule type \'{}\''.format(extension)) p = Chem.AdjustQueryParameters() p.makeAtomsGeneric = True p.makeBondsGeneric = True mol = Chem.AdjustQueryProperties(mol, p) return mol
def __init__(self): self.frag_list = list() self.mol_list = list() self.N_frag = 0 self.N_mol = 0 self.frag2mol = list() self.mol2frag = list() self.max_mol2frag = 0 self.max_frag2mol = 0 self.qp = Chem.AdjustQueryParameters() self.frag2mol_mapping = list()
def _get_ligands(self): ligands_df = pd.read_sql_query( """ select pdbid, mol_send(molecule) as molecule, atoms, rings, aromatic_rings, weight from {ligands} where molecule is not null """.format(ligands=PopulateLigandsScript.LIGANDS_DB), self.conn) params = Chem.AdjustQueryParameters() params.makeAtomsGeneric = True params.makeBondsGeneric = True params.adjustRingCount = True ligands_df.loc[:, "molecule"] = ligands_df.loc[:, "molecule"].apply( lambda m: Chem.Mol(m.tobytes())) ligands_df.loc[:, "pattern"] = ligands_df.loc[:, "molecule"].apply( lambda m: Chem.AdjustQueryProperties(m, params)) return ligands_df
def align_mol_to_frags(smi_molecule, smi_linker, smi_frags): try: # Load SMILES as molecules mol = Chem.MolFromSmiles(smi_molecule) frags = Chem.MolFromSmiles(smi_frags) linker = Chem.MolFromSmiles(smi_linker) # Include dummy atoms in query du = Chem.MolFromSmiles('*') qp = Chem.AdjustQueryParameters() qp.makeDummiesQueries = True # Renumber molecule based on frags (incl. dummy atoms) aligned_mols = [] sub_idx = [] # Get matches to fragments and linker qfrag = Chem.AdjustQueryProperties(frags, qp) frags_matches = list(mol.GetSubstructMatches(qfrag, uniquify=False)) qlinker = Chem.AdjustQueryProperties(linker, qp) linker_matches = list(mol.GetSubstructMatches(qlinker, uniquify=False)) # Loop over matches for frag_match, linker_match in product(frags_matches, linker_matches): # Check if match f_match = [ idx for num, idx in enumerate(frag_match) if frags.GetAtomWithIdx(num).GetAtomicNum() != 0 ] l_match = [ idx for num, idx in enumerate(linker_match) if linker.GetAtomWithIdx(num).GetAtomicNum() != 0 and idx not in f_match ] # If perfect match, break if len(set(list(f_match) + list(l_match))) == mol.GetNumHeavyAtoms(): break # Add frag indices sub_idx += frag_match # Add linker indices to end sub_idx += [ idx for num, idx in enumerate(linker_match) if linker.GetAtomWithIdx(num).GetAtomicNum() != 0 and idx not in sub_idx ] aligned_mols.append(Chem.rdmolops.RenumberAtoms(mol, sub_idx)) aligned_mols.append(frags) nodes_to_keep = [i for i in range(len(frag_match))] # Renumber dummy atoms to end dummy_idx = [] for atom in aligned_mols[1].GetAtoms(): if atom.GetAtomicNum() == 0: dummy_idx.append(atom.GetIdx()) for i, mol in enumerate(aligned_mols): sub_idx = list(range(aligned_mols[1].GetNumHeavyAtoms() + 2)) for idx in dummy_idx: sub_idx.remove(idx) sub_idx.append(idx) if i == 0: mol_range = list(range(mol.GetNumHeavyAtoms())) else: mol_range = list(range(mol.GetNumHeavyAtoms() + 2)) idx_to_add = list(set(mol_range).difference(set(sub_idx))) sub_idx.extend(idx_to_add) aligned_mols[i] = Chem.rdmolops.RenumberAtoms(mol, sub_idx) # Get exit vectors exit_vectors = [] for atom in aligned_mols[1].GetAtoms(): if atom.GetAtomicNum() == 0: if atom.GetIdx() in nodes_to_keep: nodes_to_keep.remove(atom.GetIdx()) for nei in atom.GetNeighbors(): exit_vectors.append(nei.GetIdx()) if len(exit_vectors) != 2: print("Incorrect number of exit vectors") return (aligned_mols[0], aligned_mols[1]), nodes_to_keep, exit_vectors except: print("Could not align") return ([], []), [], []
def compute_distance_and_angle(mol, smi_linker, smi_frags): try: frags = [Chem.MolFromSmiles(frag) for frag in smi_frags.split(".")] frags = Chem.MolFromSmiles(smi_frags) linker = Chem.MolFromSmiles(smi_linker) # Include dummy in query du = Chem.MolFromSmiles('*') qp = Chem.AdjustQueryParameters() qp.makeDummiesQueries = True # Renumber based on frags (incl. dummy atoms) aligned_mols = [] sub_idx = [] # Align to frags and linker qfrag = Chem.AdjustQueryProperties(frags, qp) frags_matches = list(mol.GetSubstructMatches(qfrag, uniquify=False)) qlinker = Chem.AdjustQueryProperties(linker, qp) linker_matches = list(mol.GetSubstructMatches(qlinker, uniquify=False)) # Loop over matches for frag_match, linker_match in product(frags_matches, linker_matches): # Check if match f_match = [ idx for num, idx in enumerate(frag_match) if frags.GetAtomWithIdx(num).GetAtomicNum() != 0 ] l_match = [ idx for num, idx in enumerate(linker_match) if linker.GetAtomWithIdx(num).GetAtomicNum() != 0 and idx not in f_match ] if len(set(list(f_match) + list(l_match))) == mol.GetNumHeavyAtoms(): #if len(set(list(frag_match)+list(linker_match))) == mol.GetNumHeavyAtoms(): break # Add frag indices sub_idx += frag_match # Add linker indices to end sub_idx += [ idx for num, idx in enumerate(linker_match) if linker.GetAtomWithIdx(num).GetAtomicNum() != 0 and idx not in sub_idx ] nodes_to_keep = [i for i in range(len(frag_match))] aligned_mols.append(Chem.rdmolops.RenumberAtoms(mol, sub_idx)) aligned_mols.append(frags) # Renumber dummy atoms to end dummy_idx = [] for atom in aligned_mols[1].GetAtoms(): if atom.GetAtomicNum() == 0: dummy_idx.append(atom.GetIdx()) for i, mol in enumerate(aligned_mols): sub_idx = list(range(aligned_mols[1].GetNumHeavyAtoms() + 2)) for idx in dummy_idx: sub_idx.remove(idx) sub_idx.append(idx) if i == 0: mol_range = list(range(mol.GetNumHeavyAtoms())) else: mol_range = list(range(mol.GetNumHeavyAtoms() + 2)) idx_to_add = list(set(mol_range).difference(set(sub_idx))) sub_idx.extend(idx_to_add) aligned_mols[i] = Chem.rdmolops.RenumberAtoms(mol, sub_idx) # Get exit vectors exit_vectors = [] linker_atom_idx = [] for atom in aligned_mols[1].GetAtoms(): if atom.GetAtomicNum() == 0: if atom.GetIdx() in nodes_to_keep: nodes_to_keep.remove(atom.GetIdx()) for nei in atom.GetNeighbors(): exit_vectors.append(nei.GetIdx()) linker_atom_idx.append(atom.GetIdx()) # Get coords conf = aligned_mols[0].GetConformer() exit_coords = [] for exit in exit_vectors: exit_coords.append(np.array(conf.GetAtomPosition(exit))) linker_coords = [] for linker_atom in linker_atom_idx: linker_coords.append(np.array(conf.GetAtomPosition(linker_atom))) # Get angle v1_u = unit_vector(linker_coords[0] - exit_coords[0]) v2_u = unit_vector(linker_coords[1] - exit_coords[1]) angle = np.arccos(np.clip(np.dot(v1_u, v2_u), -1.0, 1.0)) # Get linker length linker = Chem.MolFromSmiles(smi_linker) linker_length = linker.GetNumHeavyAtoms() # Get distance distance = np.linalg.norm(exit_coords[0] - exit_coords[1]) # Record results return distance, angle except: print(Chem.MolToSmiles(mol), smi_linker, smi_frags) return None, None
def get_linker(full_mol, clean_frag, starting_point): # INPUT FORMAT: molecule (RDKit mol object), clean fragments (RDKit mol object), starting fragments (SMILES) # Get matches of fragments matches = list(full_mol.GetSubstructMatches(clean_frag)) # If no matches, terminate if len(matches) == 0: print("No matches") return "" # Get number of atoms in linker linker_len = full_mol.GetNumHeavyAtoms() - clean_frag.GetNumHeavyAtoms() if linker_len == 0: return "" # Setup mol_to_break = Chem.Mol(full_mol) Chem.Kekulize(full_mol, clearAromaticFlags=True) poss_linker = [] if len(matches) > 0: # Loop over matches for match in matches: mol_rw = Chem.RWMol(full_mol) # Get linker atoms linker_atoms = list( set(list(range( full_mol.GetNumHeavyAtoms()))).difference(match)) linker_bonds = [] atoms_joined_to_linker = [] # Loop over starting fragments atoms # Get (i) bonds between starting fragments and linker, (ii) atoms joined to linker for idx_to_delete in sorted(match, reverse=True): nei = [ x.GetIdx() for x in mol_rw.GetAtomWithIdx( idx_to_delete).GetNeighbors() ] intersect = set(nei).intersection(set(linker_atoms)) if len(intersect) == 1: linker_bonds.append( mol_rw.GetBondBetweenAtoms( idx_to_delete, list(intersect)[0]).GetIdx()) atoms_joined_to_linker.append(idx_to_delete) elif len(intersect) > 1: for idx_nei in list(intersect): linker_bonds.append( mol_rw.GetBondBetweenAtoms(idx_to_delete, idx_nei).GetIdx()) atoms_joined_to_linker.append(idx_to_delete) # Check number of atoms joined to linker # If not == 2, check next match if len(set(atoms_joined_to_linker)) != 2: continue # Delete starting fragments atoms for idx_to_delete in sorted(match, reverse=True): mol_rw.RemoveAtom(idx_to_delete) linker = Chem.Mol(mol_rw) # Check linker required num atoms if linker.GetNumHeavyAtoms() == linker_len: mol_rw = Chem.RWMol(full_mol) # Delete linker atoms for idx_to_delete in sorted(linker_atoms, reverse=True): mol_rw.RemoveAtom(idx_to_delete) frags = Chem.Mol(mol_rw) # Check there are two disconnected fragments if len(Chem.rdmolops.GetMolFrags(frags)) == 2: # Fragment molecule into starting fragments and linker fragmented_mol = Chem.FragmentOnBonds( mol_to_break, linker_bonds) # Remove starting fragments from fragmentation linker_to_return = Chem.Mol(fragmented_mol) qp = Chem.AdjustQueryParameters() qp.makeDummiesQueries = True for f in starting_point.split('.'): qfrag = Chem.AdjustQueryProperties( Chem.MolFromSmiles(f), qp) linker_to_return = AllChem.DeleteSubstructs( linker_to_return, qfrag, onlyFrags=True) # Check linker is connected and two bonds to outside molecule if len(Chem.rdmolops.GetMolFrags(linker)) == 1 and len( linker_bonds) == 2: Chem.Kekulize(linker_to_return, clearAromaticFlags=True) # If for some reason a starting fragment isn't removed (and it's larger than the linker), remove (happens v. occassionally) if len(Chem.rdmolops.GetMolFrags( linker_to_return)) > 1: for frag in Chem.MolToSmiles( linker_to_return).split('.'): if Chem.MolFromSmiles( frag).GetNumHeavyAtoms() == linker_len: return frag return Chem.MolToSmiles( Chem.MolFromSmiles( Chem.MolToSmiles(linker_to_return))) # If not, add to possible linkers (above doesn't capture some complex cases) else: fragmented_mol = Chem.MolFromSmiles( Chem.MolToSmiles(fragmented_mol), sanitize=False) linker_to_return = AllChem.DeleteSubstructs( fragmented_mol, Chem.MolFromSmiles(starting_point)) poss_linker.append(Chem.MolToSmiles(linker_to_return)) # If only one possibility, return linker if len(poss_linker) == 1: return poss_linker[0] # If no possibilities, process failed elif len(poss_linker) == 0: print("FAIL:", Chem.MolToSmiles(full_mol), Chem.MolToSmiles(clean_frag), starting_point) return "" # If multiple possibilities, process probably failed else: print("More than one poss linker. ", poss_linker) return poss_linker[0]
def join_frag_linker(linker, st_pt, random_join=True): if linker == "": du = Chem.MolFromSmiles('*') #print(Chem.MolToSmiles(Chem.RemoveHs(AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt),du,Chem.MolFromSmiles('[H]'),True)[0])).split('.')[0]) return Chem.MolToSmiles( Chem.RemoveHs( AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt), du, Chem.MolFromSmiles('[H]'), True)[0])).split('.')[0] combo = Chem.CombineMols(Chem.MolFromSmiles(linker), Chem.MolFromSmiles(st_pt)) # Include dummy in query du = Chem.MolFromSmiles('*') qp = Chem.AdjustQueryParameters() qp.makeDummiesQueries = True qlink = Chem.AdjustQueryProperties(Chem.MolFromSmiles(linker), qp) linker_atoms = combo.GetSubstructMatches(qlink) if len(linker_atoms) > 1: for l_atoms in linker_atoms: count_dummy = 0 for a in l_atoms: if combo.GetAtomWithIdx(a).GetAtomicNum() == 0: count_dummy += 1 if count_dummy == 2: break linker_atoms = l_atoms else: linker_atoms = linker_atoms[0] linker_dummy_bonds = [] linker_dummy_bonds_at = [] linker_exit_points = [] for atom in linker_atoms: if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0: linker_dummy_bonds.append( combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx()) linker_dummy_bonds_at.append( (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())) linker_exit_points.append( combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()) qst_pt = Chem.AdjustQueryProperties(Chem.MolFromSmiles(st_pt), qp) st_pt_atoms = combo.GetSubstructMatches(qst_pt) st_pt_atoms = list( set(range(combo.GetNumAtoms())).difference(linker_atoms)) st_pt_dummy_bonds = [] st_pt_dummy_bonds_at = [] st_pt_exit_points = [] for atom in st_pt_atoms: if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0: st_pt_dummy_bonds.append( combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx()) st_pt_dummy_bonds_at.append( (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())) st_pt_exit_points.append( combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()) combo_rw = Chem.EditableMol(combo) if random_join: np.random.shuffle(st_pt_exit_points) for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_points): if atom_1 == atom_2: print(linker, st_pt) break combo_rw.AddBond(atom_1, atom_2, order=Chem.rdchem.BondType.SINGLE) bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at for bond in sorted(bonds_to_break, reverse=True): combo_rw.RemoveBond(bond[0], bond[1]) final_mol = combo_rw.GetMol() final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'), key=lambda x: len(x), reverse=True)[0] return final_mol else: final_mols = [] for st_pt_exit_pts in [st_pt_exit_points, st_pt_exit_points[::-1]]: combo_rw = Chem.EditableMol(combo) for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_pts): if atom_1 == atom_2: print(linker, st_pt) break combo_rw.AddBond(atom_1, atom_2, order=Chem.rdchem.BondType.SINGLE) bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at for bond in sorted(bonds_to_break, reverse=True): combo_rw.RemoveBond(bond[0], bond[1]) final_mol = combo_rw.GetMol() final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'), key=lambda x: len(x), reverse=True)[0] final_mols.append(final_mol) return final_mols