def get_smallest_root_match(self, mol: Mol) -> Mol: search_space: Set[Mol] = set(self.molecules.values()) all_idxs = set(range(0, mol.GetNumAtoms())) included_idxs = {0} bonds = {} def register_bond(from_idx: int, to_idx: int): entry = bonds.get(from_idx) if entry is None: entry = [] bonds[from_idx] = entry entry.append(to_idx) for bond in mol.GetBonds(): begin = bond.GetBeginAtomIdx() end = bond.GetEndAtomIdx() register_bond(begin, end) register_bond(end, begin) while len(included_idxs) < mol.GetNumAtoms(): frontier_permutations = reduce( lambda perms, from_idx: perms | set( map( lambda to_idx: frozenset([*included_idxs, to_idx]), filter(lambda idx: idx not in included_idxs, bonds[from_idx]) ) ), included_idxs, set() ) new_search_space = set() for perm in frontier_permutations: e_mol = Chem.EditableMol(mol) perm_idxs = list(all_idxs - perm) perm_idxs.sort(reverse=True) for idx in perm_idxs: e_mol.RemoveAtom(idx) display('mul') display_numbered(e_mol.GetMol()) new_search_space |= set(self.find_superstructures(e_mol.GetMol(), search_space)) included_idxs |= perm if len(new_search_space) == 0: return self.get_smallest_mol(list(search_space)) search_space = new_search_space if len(search_space) < 100: display("from mul") for s in search_space: display_numbered(s) if len(frontier_permutations) > 1: e_mol = Chem.EditableMol(mol) perm_idxs = list(all_idxs - included_idxs) perm_idxs.sort(reverse=True) for idx in perm_idxs: e_mol.RemoveAtom(idx) display('single') display_numbered(e_mol.GetMol()) new_search_space = set(self.find_superstructures(e_mol.GetMol(), search_space)) if len(new_search_space) == 0: return self.get_smallest_mol(list(search_space)) search_space = new_search_space if len(search_space) < 100: display("from mul") for s in search_space: display_numbered(s)
def new_process_covalent(directory): for f in [x[0] for x in os.walk(directory)]: covalent = False print(str(f) + '/*_bound.pdb') print(glob.glob(str(f) + '/*_bound.pdb')) if glob.glob(str(f) + '/*_bound.pdb'): bound_pdb = glob.glob(str(f) + '/*_bound.pdb')[0] mol_file = glob.glob(str(f) + '/*.mol')[0] pdb = open(bound_pdb, 'r').readlines() for line in pdb: if 'LINK' in line: zero = line[13:27] one = line[43:57] if 'LIG' in zero: res = one covalent = True if 'LIG' in one: res = zero covalent = True if covalent: for line in pdb: if 'ATOM' in line and line[13:27]==res: res_x = float(line[31:39]) res_y = float(line[39:47]) res_z = float(line[47:55]) res_atom_sym = line.rsplit()[-1].rstrip() atom_sym_no = pd.DataFrame.from_csv('loader/atom_numbers.csv') res_atom_no = atom_sym_no.loc[res_atom_sym].number res_coords = [res_x, res_y, res_z] print(res_coords) atm = Chem.MolFromPDBBlock(line) atm_trans = atm.GetAtomWithIdx(0) mol = Chem.MolFromMolFile(mol_file) # edmol = Chem.EditableMol(mol) orig_pdb_block = Chem.MolToPDBBlock(mol) lig_block = '\n'.join([l for l in orig_pdb_block.split('\n') if 'COMPND' not in l]) lig_lines = [l for l in lig_block.split('\n') if 'HETATM' in l] j = 0 old_dist = 100 for line in lig_lines: j += 1 # print(line) if 'HETATM' in line: coords = [line[31:39].strip(), line[39:47].strip(), line[47:55].strip()] dist = get_3d_distance(coords, res_coords) if dist < old_dist: ind_to_add = j print(dist) old_dist = dist i = mol.GetNumAtoms() edmol = Chem.EditableMol(mol) edmol.AddAtom(atm_trans) edmol.AddBond(ind_to_add - 1, i, Chem.BondType.SINGLE) new_mol = edmol.GetMol() conf = new_mol.GetConformer() conf.SetAtomPosition(i, Point3D(res_coords[0], res_coords[1], res_coords[2])) try: Chem.MolToMolFile(new_mol, mol_file) except ValueError: Chem.MolToMolFile(new_mol, mol_file, kekulize=False)
def from_mols(cls, name: str, followup: Chem.Mol, thio: Chem.Mol, best_hit): self = cls.__new__(cls) self.smiles = Chem.MolToSmiles(followup, kekuleSmiles=False) self.name = name # cached property self._index_map = None self._name_map = None self._best_hit = best_hit self.notebook = {} # operations self.ori_mol = followup Chem.AddHs(self.ori_mol) self.thio_mol = thio Chem.AddHs(self.thio_mol) # Dethiolate manually. All this for a proton that will be out of place anyway. self.CX_idx, self.SX_idx = self.thio_mol.GetSubstructMatches( Chem.MolFromSmiles('C[S-]'))[-1] Chem.GetSSSR(thio) AllChem.EmbedMultipleConfs(thio, numConfs=100) AllChem.UFFOptimizeMoleculeConfs(thio, maxIters=2000) AllChem.ComputeGasteigerCharges(thio) AllChem.MMFFOptimizeMolecule(thio) dethio = Chem.EditableMol(thio) dethio.RemoveAtom(self.SX_idx) self.dethio_mol = dethio.GetMol() ## align ligand aligned_file = 'temp.mol' self.save_confs(self.dethio_mol, aligned_file) print(f'SMILES converted: {name}') self.parameterise(aligned_file) # self.pdb_mol gets assigned. print(f'Parameterised: {name}') # pdbblock = self.make_placed_pdb() for ma, pa in zip(self.dethio_mol.GetAtoms(), self.pdb_mol.GetAtoms()): assert ma.GetSymbol() == pa.GetSymbol( ), f'The indices do not align! {ma.GetIdx()}:{ma.GetSymbol()} vs. {pa.GetIdx()}:{pa.GetSymbol()}' ma.SetMonomerInfo(pa.GetPDBResidueInfo()) with GlobalPyMOL() as pymol: pymol.cmd.delete('*') # pymol.cmd.load(self.best_hit.relaxbound_file, 'apo') # # fix drift # pymol.cmd.load(self.best_hit.bound_file, 'ref') # pymol.cmd.align('apo', 'ref') # pymol.cmd.delete('ref') pymol.cmd.load(self.best_hit.apo_file, 'apo') pymol.cmd.remove('resn LIG') # distort positions pymol.cmd.read_pdbstr( Chem.MolToPDBBlock(self.fragmenstein.positioned_mol), 'scaffold') pymol.cmd.save( f'{self.work_path}/{self.name}/{self.name}.scaffold.pdb') pymol.cmd.delete('scaffold') pymol.cmd.read_pdbstr( Chem.MolToPDBBlock(self.fragmenstein.positioned_mol), 'ligand') pdbblock = pymol.cmd.get_pdbstr('*') pymol.cmd.delete('*') return 'LINK SG CYS A 145 CX LIG B 1 1555 1555 1.8\n' + pdbblock open(f'{self.work_path}/{self.name}/pre_{self.name}.pdb', 'w').write(pdbblock) self.make_overlap_image() self.pose = self.make_pose(pdbblock) print(f'PyRosetta loaded: {name}') self.egor = self.call_egor() print(f'EM: {name}') self.dock_pose() print(f'Docked: {name}') # if refine: # self.refine_pose() self.snap_shot() print(f'Snapped: {name}') self.score = self.calculate_score() json.dump(self.notebook, open(f'{self.work_path}/{self.name}/{self.name}.json', 'w')) print(f'Done: {name}') @classmethod def reanimate(cls, mol: Chem.Mol, hits: List[Hit], constraint_file: str, ligand_residue: Union[str, int, Tuple[int, str], pyrosetta.Vector1], key_residues: Union[None, Sequence[Union[int, str, Tuple[int, str]]], pyrosetta.Vector1] = None): fragmenstein = Fragmenstein(mol, hits) fragmenstein.positioned_mol
before = time.time() argc = len(sys.argv) if argc != 2: print("usage: %s input.smi" % sys.argv[0]) sys.exit(1) input = sys.argv[1] count = 0 wildcard = Chem.Atom(0) for name, orig_smile in RobustSmilesMolSupplier(input): mol = Chem.MolFromSmiles(orig_smile) # output original molecule first print("%s\t%s" % (orig_smile, name)) num_atoms = mol.GetNumAtoms() # then output its variants for i in range(num_atoms): editable = Chem.EditableMol(mol) editable.ReplaceAtom(i, wildcard, preserveProps=True) edited = editable.GetMol() smi = Chem.MolToSmiles(edited) print("%s\t%s_%d" % (smi, name, i)) count += 1 after = time.time() dt = after - before print("%d molecules at %.2f mol/s" % (count, count / dt), file=sys.stderr) # # original code by @Iwatobipen # # replace any aromatic carbon to aromatic nitrogen. # # TODO: does not compile # def nitrogen_scan(mol_in): # out_mol_list = [] # used = set()
def trim_mcs_chiral_atoms(): """ Remove all atoms in the MCS where there might be a chirality inversion i.e. (a) the corresponding atoms in the input molecules are both chiral, and (b) the parity of the atom mapping in the input molecules is reversed Calls map_mcs_mol as it uses the mappings generated there. """ def reorder_mol_to_mcs(mol): """ Reorder a molecule so that its atoms are in the same order as the MCS, using the 'to_mcs' property. Return the reordered molecule (as a copy). """ newindexes = list(range(mol.GetNumAtoms())) # Find the atom mapping to atom 0 in the MCS, and swap # it with position 0. Rinse and repeat for i in range(mol.GetNumAtoms()): for j in range(i + 1, mol.GetNumAtoms()): if (mol.GetAtomWithIdx(j).HasProp('to_mcs')): if int(mol.GetAtomWithIdx(j).GetProp( 'to_mcs')) == i: newindexes[i], newindexes[j] = newindexes[ j], newindexes[i] reordered_mol_copy = Chem.RenumberAtoms(mol, newindexes) return reordered_mol_copy def flag_inverted_atoms_in_mcs(): """ Flag all atoms in the MCS where the chirality is inverted between moli and molj with CHI_TETRAHEDRAL_CW) """ # Generate atommappings as they are useful below map_mcs_mol() # moli chiral atoms rmoli = reorder_mol_to_mcs(self.moli) chiral_at_moli = [ seq[0] for seq in Chem.FindMolChiralCenters(rmoli) ] # molj chiral atoms rmolj = reorder_mol_to_mcs(self.molj) chiral_at_molj = [ seq[0] for seq in Chem.FindMolChiralCenters(rmolj) ] invertedatoms = [] for i in chiral_at_moli: # Is atom i in the MCS? ai = rmoli.GetAtomWithIdx(i) if (ai.HasProp('to_mcs')): #print("Checking mol i chiral atom",i,ai.GetProp('to_mcs')) for j in chiral_at_molj: # Is atom j in the MCS? aj = rmolj.GetAtomWithIdx(j) if (aj.HasProp('to_mcs')): #print("Matching mol j chiral atom",j,aj.GetProp('to_mcs')) # Are they the same atom? if (ai.GetProp('to_mcs') == aj.GetProp( 'to_mcs')): #print("Matched mcs atom ",aj.GetProp('to_mcs'),"inverted?",ai.GetChiralTag()!=aj.GetChiralTag()) # OK, atoms are both chiral, and match the same MCS atom # Check if the parities are the same. If not, flag with the # CHI_TETRAHEDRAL_CW property if (ai.GetChiralTag() != aj.GetChiralTag()): invertedatoms.append( int(aj.GetProp('to_mcs'))) for i in invertedatoms: mcsat = self.mcs_mol.GetAtomWithIdx(i) mcsat.SetChiralTag( Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW) if options.verbose == 'pedantic': logging.info('Chiral atom detected: %d' % (i)) #print("MCS before chiral trimming: ",Chem.MolToSmiles(self.mcs_mol)) # Flag inverted atoms flag_inverted_atoms_in_mcs() # Trim inverted chiral Atoms. The algorithm is to delete the chiral centre, # fragment the molecule, and keep only the two largest fragments. Rinse and # repeat until no more flagged chiral centres remain # # Keep while True: mcs_chiral_set = set() atom_idx = -1 for atom in self.mcs_mol.GetAtoms(): # Note that any atom in the MCS which is chiral in either input mol is # flagged with CHI_TETRAHEDRAL_CW if (atom.GetChiralTag() == Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW): atom_idx = atom.GetIdx() atom.SetChiralTag( Chem.rdchem.ChiralType.CHI_UNSPECIFIED) break if atom_idx == -1: # Not found any more chiral atoms, so done break # Move the chiral atom to the end (avoids indexing problems) newindexes = list(range(self.mcs_mol.GetNumAtoms())) newindexes.remove(atom_idx) newindexes.append(atom_idx) self.mcs_mol = Chem.RenumberAtoms(self.mcs_mol, newindexes) # Now we loop, deleting groups attached to the chiral atom, until the # chiral atom has at most two heavy atom connections # Note that getAtoms()[-1] returns the first atom not the last if you # don't convert it to a list. Grr. while list(self.mcs_mol.GetAtoms())[-1].GetDegree() > 2: #print("MCS mol is",Chem.MolToSmiles(self.mcs_mol),self.mcs_mol.GetNumHeavyAtoms()) # Delete the chiral atom in a temporary molecule, and fragment. Since the # chiral atom was the last one, the indexes in the temporary molecule are the # same as in self.mcs_mol edit_mol = Chem.EditableMol(self.mcs_mol) edit_mol.RemoveAtom(self.mcs_mol.GetNumAtoms() - 1) tmp_mol = edit_mol.GetMol() fragments = Chem.rdmolops.GetMolFrags(tmp_mol) #print("Fragments are" ,fragments) # Get index of smallest fragments min_idx = 0 lgt_min = 10000 for idx in range(0, len(fragments)): lgt = len(fragments[idx]) if lgt < lgt_min: lgt_min = lgt min_idx = idx # Get the atoms in this fragment and sort them so we delete the # largest index first min_frag = list(fragments[min_idx]) min_frag.sort(reverse=True) edit_mol = Chem.EditableMol(self.mcs_mol) for idx in min_frag: edit_mol.RemoveAtom(idx) self.mcs_mol = edit_mol.GetMol()
def ring_close_retro(lp, pm=None): ''' Reverse of Ring-closing which will occur after addition of heat Source: Reynolds Class Notes, 8-25-20, p.2 Test SMILES: '*c5ccc(Oc4ccc(n3c(=O)c2cc1c(=O)n(*)c(=O)c1cc2c3=O)cc4)cc5' ''' if type(lp) == str or type( lp ) == Chem.rdchem.Mol: #only for convenience. Pass in LinearPol object when possible lp = ru.LinearPol(lp) start_match = Chem.MolFromSmarts('[c,C;!R0;!R1](=O)[n,N;!R0;!R1]') end_match = Chem.MolFromSmarts('[#6R0](=O)([OH])[C,c][C,c][CR1](=O)[NR1]') if pm is None: pm = lp.PeriodicMol() if pm is not None: pm.GetSSSR() if pm is None: return [] mols = [] if pm.HasSubstructMatch( start_match) and not pm.HasSubstructMatch(end_match): lp_no_connect_inds = np.array( [x for x in range(lp.mol.GetNumAtoms()) if x not in lp.star_inds]) def lp_to_pm_ind(lp_ind): return int(np.argwhere(lp_no_connect_inds == lp_ind)) ar_atom_idx = [a.GetIdx() for a in lp.mol.GetAromaticAtoms()] if len( ar_atom_idx ) != 0: #only execute below for aromatic polymers. There for speed. ri = lp.mol.GetRingInfo() ar = ri.AtomRings() atom_aromaticity = {a: 0 for a in ar_atom_idx} for ring in ar: ar_ring = 1 for a in ring: if a not in atom_aromaticity.keys(): ar_ring = 0 if ar_ring == 1: for a in ring: atom_aromaticity[a] += 1 all_matches = pm.mol.GetSubstructMatches(start_match) seen = set() matches = [] #unique matches for match in all_matches: ms = set(match) #match set #print(seen.difference(ms)) #print(ms.difference(seen)) if len(ms.intersection(seen)) == 0: seen = seen.union(ms) matches.append(match) #matches = pm.mol.GetSubstructMatches(start_match) for L in range(1, len(matches) + 1): #for L in range(2,3): for match_combo in itertools.combinations(matches, L): em = Chem.EditableMol(pm.mol) #print('Match combo:', match_combo) for i_c, i_o, i_n in match_combo: #indices of atoms in pm #print('Matches: %s %s %s' %(i_c,i_o,i_n) ) fix_aromaticity = False if pm.mol.GetBondBetweenAtoms( i_c, i_n).GetBondType() == Chem.BondType.AROMATIC: fix_aromaticity = True ring_atoms = None ring_size = 100 for i in range(len(ar)): ring = ar[i] if lp_no_connect_inds[ i_c] in ring and lp_no_connect_inds[ i_n] in ring and len( ring ) < ring_size: #assume correct ring is the smallest one ring_atoms = set(ring) ring_size = len(ring) o = em.AddAtom(Chem.AtomFromSmiles('O')) em.AddBond(i_c, o, Chem.BondType.SINGLE) #print('bond between %s and %s' %(i_c,o)) em.RemoveBond(i_c, i_n) #print('Bond removed between %s and %s' %(i_c,i_n)) med_mol = em.GetMol() if fix_aromaticity: try: i_n_aromaticity = atom_aromaticity[lp_to_pm_ind( i_n)] except: i_n_aromaticity = 0 for i in ring_atoms: if atom_aromaticity[ i] == i_n_aromaticity: #if an atom was part of same number of aromatic rings as the N atom, it shouldn't be aromatic #print('Ring atom lp:',i) pm_i = lp_to_pm_ind(i) #print('Ring atom pm:',pm_i) med_mol.GetAtomWithIdx(pm_i).SetIsAromatic( False) #remove all aromatic bonds neighs = [ x.GetIdx() for x in med_mol.GetAtoms() [pm_i].GetNeighbors() ] aromatic_neighs = [ x for x in neighs if med_mol.GetBondBetweenAtoms(pm_i, x). GetBondType() == Chem.BondType.AROMATIC ] #print('Aromatic neighs of %s: %s' %(pm_i,aromatic_neighs)) em = Chem.EditableMol(med_mol) for x in aromatic_neighs: em.RemoveBond(x, pm_i) em.AddBond(x, pm_i, Chem.BondType.SINGLE) med_mol = em.GetMol() em = Chem.EditableMol(med_mol) star1 = em.AddAtom(Chem.AtomFromSmiles('*')) star2 = em.AddAtom(Chem.AtomFromSmiles('*')) em.RemoveBond(pm.connector_inds[0], pm.connector_inds[1]) em.AddBond(pm.connector_inds[0], star1, Chem.BondType.SINGLE) em.AddBond(pm.connector_inds[1], star2, Chem.BondType.SINGLE) new_mol = em.GetMol() try: Chem.SanitizeMol(new_mol) mols.append(ru.mol_without_atom_index(new_mol)) except: return [] return mols else: return []
def delete_bonds(smi, id, mol, bonds, out): #use the same parent mol object and create editable mol em = Chem.EditableMol(mol) #loop through the bonds to delete isotope = 0 isotope_track = {} for i in bonds: isotope += 1 #remove the bond em.RemoveBond(i[0], i[1]) #now add attachement points newAtomA = em.AddAtom(Chem.Atom(0)) em.AddBond(i[0], newAtomA, Chem.BondType.SINGLE) newAtomB = em.AddAtom(Chem.Atom(0)) em.AddBond(i[1], newAtomB, Chem.BondType.SINGLE) #keep track of where to put isotopes isotope_track[newAtomA] = isotope isotope_track[newAtomB] = isotope #should be able to get away without sanitising mol #as the existing valencies/atoms not changed modifiedMol = em.GetMol() #canonical smiles can be different with and without the isotopes #hence to keep track of duplicates use fragmented_smi_noIsotopes fragmented_smi_noIsotopes = Chem.MolToSmiles(modifiedMol, isomericSmiles=True) valid = True fragments = fragmented_smi_noIsotopes.split(".") #check if its a valid triple cut if (isotope == 3): valid = False for f in fragments: matchObj = re.search('\*.*\*.*\*', f) if matchObj: valid = True break if valid: if (isotope == 1): fragmented_smi_noIsotopes = re.sub('\[\*\]', '[*:1]', fragmented_smi_noIsotopes) fragments = fragmented_smi_noIsotopes.split(".") #print fragmented_smi_noIsotopes s1 = Chem.MolFromSmiles(fragments[0]) s2 = Chem.MolFromSmiles(fragments[1]) #need to cansmi again as smiles can be different output = '%s,%s,,%s.%s' % ( smi, id, Chem.MolToSmiles(s1, isomericSmiles=True), Chem.MolToSmiles(s2, isomericSmiles=True)) if ((output in out) == False): out.add(output) elif (isotope >= 2): #add the isotope labels for key in isotope_track: #to add isotope lables modifiedMol.GetAtomWithIdx(key).SetIsotope(isotope_track[key]) fragmented_smi = Chem.MolToSmiles(modifiedMol, isomericSmiles=True) #change the isotopes into labels - currently can't add SMARTS or labels to mol fragmented_smi = re.sub('\[1\*\]', '[*:1]', fragmented_smi) fragmented_smi = re.sub('\[2\*\]', '[*:2]', fragmented_smi) fragmented_smi = re.sub('\[3\*\]', '[*:3]', fragmented_smi) fragments = fragmented_smi.split(".") #identify core/side chains and cansmi them core, side_chains = find_correct(fragments) #now change the labels on sidechains and core #to get the new labels, cansmi the dot-disconnected side chains #the first fragment in the side chains has attachment label 1, 2nd: 2, 3rd: 3 #then change the labels accordingly in the core #this is required by the indexing script, as the side-chains are "keys" in the index #this ensures the side-chains always have the same numbering isotope_track = {} side_chain_fragments = side_chains.split(".") for s in xrange(len(side_chain_fragments)): matchObj = re.search('\[\*\:([123])\]', side_chain_fragments[s]) if matchObj: #add to isotope_track with key: old_isotope, value: isotope_track[matchObj.group(1)] = str(s + 1) #change the labels if required if (isotope_track['1'] != '1'): core = re.sub('\[\*\:1\]', '[*:XX' + isotope_track['1'] + 'XX]', core) side_chains = re.sub('\[\*\:1\]', '[*:XX' + isotope_track['1'] + 'XX]', side_chains) if (isotope_track['2'] != '2'): core = re.sub('\[\*\:2\]', '[*:XX' + isotope_track['2'] + 'XX]', core) side_chains = re.sub('\[\*\:2\]', '[*:XX' + isotope_track['2'] + 'XX]', side_chains) if (isotope == 3): if (isotope_track['3'] != '3'): core = re.sub('\[\*\:3\]', '[*:XX' + isotope_track['3'] + 'XX]', core) side_chains = re.sub('\[\*\:3\]', '[*:XX' + isotope_track['3'] + 'XX]', side_chains) #now remove the XX core = re.sub('XX', '', core) side_chains = re.sub('XX', '', side_chains) output = '%s,%s,%s,%s' % (smi, id, core, side_chains) if ((output in out) == False): out.add(output)
def join_frag_linker(linker, st_pt, random_join=True): if linker == "": du = Chem.MolFromSmiles('*') #print(Chem.MolToSmiles(Chem.RemoveHs(AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt),du,Chem.MolFromSmiles('[H]'),True)[0])).split('.')[0]) return Chem.MolToSmiles( Chem.RemoveHs( AllChem.ReplaceSubstructs(Chem.MolFromSmiles(st_pt), du, Chem.MolFromSmiles('[H]'), True)[0])).split('.')[0] combo = Chem.CombineMols(Chem.MolFromSmiles(linker), Chem.MolFromSmiles(st_pt)) # Include dummy in query du = Chem.MolFromSmiles('*') qp = Chem.AdjustQueryParameters() qp.makeDummiesQueries = True qlink = Chem.AdjustQueryProperties(Chem.MolFromSmiles(linker), qp) linker_atoms = combo.GetSubstructMatches(qlink) if len(linker_atoms) > 1: for l_atoms in linker_atoms: count_dummy = 0 for a in l_atoms: if combo.GetAtomWithIdx(a).GetAtomicNum() == 0: count_dummy += 1 if count_dummy == 2: break linker_atoms = l_atoms else: linker_atoms = linker_atoms[0] linker_dummy_bonds = [] linker_dummy_bonds_at = [] linker_exit_points = [] for atom in linker_atoms: if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0: linker_dummy_bonds.append( combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx()) linker_dummy_bonds_at.append( (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())) linker_exit_points.append( combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()) qst_pt = Chem.AdjustQueryProperties(Chem.MolFromSmiles(st_pt), qp) st_pt_atoms = combo.GetSubstructMatches(qst_pt) st_pt_atoms = list( set(range(combo.GetNumAtoms())).difference(linker_atoms)) st_pt_dummy_bonds = [] st_pt_dummy_bonds_at = [] st_pt_exit_points = [] for atom in st_pt_atoms: if combo.GetAtomWithIdx(atom).GetAtomicNum() == 0: st_pt_dummy_bonds.append( combo.GetAtomWithIdx(atom).GetBonds()[0].GetIdx()) st_pt_dummy_bonds_at.append( (atom, combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx())) st_pt_exit_points.append( combo.GetAtomWithIdx(atom).GetNeighbors()[0].GetIdx()) combo_rw = Chem.EditableMol(combo) if random_join: np.random.shuffle(st_pt_exit_points) for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_points): if atom_1 == atom_2: print(linker, st_pt) break combo_rw.AddBond(atom_1, atom_2, order=Chem.rdchem.BondType.SINGLE) bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at for bond in sorted(bonds_to_break, reverse=True): combo_rw.RemoveBond(bond[0], bond[1]) final_mol = combo_rw.GetMol() final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'), key=lambda x: len(x), reverse=True)[0] return final_mol else: final_mols = [] for st_pt_exit_pts in [st_pt_exit_points, st_pt_exit_points[::-1]]: combo_rw = Chem.EditableMol(combo) for atom_1, atom_2 in zip(linker_exit_points, st_pt_exit_pts): if atom_1 == atom_2: print(linker, st_pt) break combo_rw.AddBond(atom_1, atom_2, order=Chem.rdchem.BondType.SINGLE) bonds_to_break = linker_dummy_bonds_at + st_pt_dummy_bonds_at for bond in sorted(bonds_to_break, reverse=True): combo_rw.RemoveBond(bond[0], bond[1]) final_mol = combo_rw.GetMol() final_mol = sorted(Chem.MolToSmiles(final_mol).split('.'), key=lambda x: len(x), reverse=True)[0] final_mols.append(final_mol) return final_mols
def fragment_rdkit_mol(m, bond): """ Break a bond in rdkit molecule and obtain the fragment(s). Args: m (Chem.Mol): rdkit molecule to fragment bond (tuple): bond index (2-tuple) Returns: frags (list): fragments (rdkit molecules) by breaking the bond. Could be of size 1 or 2, depending on the number of fragments. """ def create_rdkit_mol_from_fragment(m, name, bond_atom): """ Convert a rdkit mol fragment to a rdkit mol with cleans. The formal charge is set to `None` for the atom in the broken bond (it will then be assigned when sanitize the mol) and untouched for other atoms (having the value as in the parent molecule). """ species = [a.GetSymbol() for a in m.GetAtoms()] # coords = m.GetConformer().GetPositions() # NOTE, the above way to get coords results in segfault on linux, so we use the # below workaround conformer = m.GetConformer() coords = [[x for x in conformer.GetAtomPosition(i)] for i in range(m.GetNumAtoms())] # should not sort (b.GetBeginAtomIdx(), b.GetEndAtomIdx()), because dative bond # needs to have the metal as the end atom bond_types = {(b.GetBeginAtomIdx(), b.GetEndAtomIdx()): b.GetBondType() for b in m.GetBonds()} # adjust format charge formal_charge = [a.GetFormalCharge() for a in m.GetAtoms()] formal_charge[bond_atom] = None new_m = create_rdkit_mol(species, coords, bond_types, formal_charge, name, force_sanitize=True) return new_m edm = Chem.EditableMol(m) edm.RemoveBond(*bond) m1 = edm.GetMol() atom_mapping = [] frags = Chem.GetMolFrags(m1, asMols=True, sanitizeFrags=True, fragsMolAtomMapping=atom_mapping) # Although we passed sanitizeFrags=True to Chem.GetMolFrags. Some properties are # still incorrect: # 1. features of the returned frags. e.g. TotalDegree() of an atom of the broken # bond still has the value before bond breaking. # 2. rdkit converts N(=O)=O to [N+](=O)O-] when sanitizing the mol, see # http://www.rdkit.org/docs/RDKit_Book.html#molecular-sanitization # So we need to copy the formal charge from the parent mol to the fragments, # otherwise, sanitizing will result in error for the fragments. # To address these, we create frags from scratch using info from the frags. new_frags = [] for i, fg in enumerate(frags): for frag_atom_idx, parent_atom_idx in enumerate(atom_mapping[i]): if parent_atom_idx in bond: bond_atom = frag_atom_idx break else: # this should never happen raise RuntimeError("Cannot find bond atom in fragments") name = f"{m.GetProp('_Name')}_frag{i}" fg = create_rdkit_mol_from_fragment(fg, name, bond_atom) new_frags.append(fg) return new_frags
# -*- coding: utf-8 -*- """ Created on Fri Apr 24 17:45:39 2020 @author: dmattox """ from rdkit import Chem from rdkit.Chem import Descriptors lst = ['OC[C@H]1OC[C@@H]([C@H]([C@@H]1O)O)O', 'OC[C@H]1OC(O[C@@H]2[C@@H](CO)O[C@@H](O)[C@H](O)[C@H]2O)[C@H](O)[C@@H](O)[C@@H]1O'] mols = [Chem.MolFromSmiles(smile, sanitize = False) for smile in lst] mergeMol = Chem.CombineMols(mols[0], mols[1]) edMerge = Chem.EditableMol(mergeMol) edMerge.AddBond(20, 4, order = Chem.rdchem.BondType.SINGLE) mergeMol = edMerge.GetMol() print(Chem.MolToSmiles(mergeMol)) print(Chem.inchi.MolToInchiKey(mergeMol)) print(Descriptors.MolLogP(mergeMol)) {'ACE:A:0': <pliptool.plip.modules.plipxml.BSite at 0x128700710>, 'ACE:B:0': <pliptool.plip.modules.plipxml.BSite at 0x1281c0a10>, 'BGC:A:122': <pliptool.plip.modules.plipxml.BSite at 0x1287cadd0>, 'BGC:A:124': <pliptool.plip.modules.plipxml.BSite at 0x1287caed0>, 'BGC:A:126': <pliptool.plip.modules.plipxml.BSite at 0x1287a8350>, 'BGC:B:122': <pliptool.plip.modules.plipxml.BSite at 0x1287a8e10>, 'BGC:B:124': <pliptool.plip.modules.plipxml.BSite at 0x1287a8410>,
def generate_fragemnts(self): # Clear class level list del self.molecule[:] # Get text input from LineEdit widget self.smile = str(self.NTPS_smiles_entry.text()) # Append cStructure mol to class level list self.molecule.append( cStructure('precursor', self.smile, 'precursor.png')) # SMARTS string for bond disconnection self.patt = Chem.MolFromSmarts( '[!$([NH]!@C(=O))&!D1&!$(*#*)]-&!@[!$([NH]!@C(=O))&!D1&!$(*#*)]') # Init parent mol object self.mol = Chem.MolFromSmiles(self.smile) # find the rotatable bonds self.bonds = self.mol.GetSubstructMatches(self.patt) # create an editable molecule, break the bonds, and add dummies: self.all_smis = [self.smile] # disconnect rotatable bonds for a, b in self.bonds: self.em = Chem.EditableMol(self.mol) self.nAts = self.mol.GetNumAtoms() self.em.RemoveBond(a, b) self.em.AddAtom(Chem.Atom(0)) self.em.AddBond(a, self.nAts, Chem.BondType.SINGLE) self.em.AddAtom(Chem.Atom(0)) self.em.AddBond(b, self.nAts + 1, Chem.BondType.SINGLE) self.nAts += 2 self.p = self.em.GetMol() Chem.SanitizeMol(self.p) self.smis = [ Chem.MolToSmiles(x, True) for x in Chem.GetMolFrags(self.p, asMols=True) ] for self.smi in self.smis: self.all_smis.append(self.smi) # draw molecules and save png images for display in structure viewer widget # --> there's probably a better way to do this... self.draw = 0 for i, self.smi in enumerate(self.all_smis): if i == 0: struct_type = 'precursor' img_path = 'precursor.png' else: struct_type, img_path = 'fragment', 'fragment_%s.png' % self.draw self.molecule.append(cStructure(struct_type, self.smi, img_path)) self.template = Chem.MolFromSmiles(self.smi) drawOptions = DrawingOptions() drawOptions.bgColor = (0, 0, 0) Draw.MolToFile(self.template, img_path, options=drawOptions) # append to class level list self.structures.append(img_path) # append to GUI fragments listbox - file name only self.NTPS_frag_list.addItem( QtGui.QListWidgetItem( img_path.split('/')[-1].strip().split('.')[0])) self.draw += 1 self.autocomplete_mod_properties() # add image of first structure to graphicsview scene = QtGui.QGraphicsScene() scene.addPixmap(QtGui.QPixmap(self.structures[0])) self.NTPS_structure_view.setScene(scene) self.NTPS_structure_view.show() return
def add_bond_between(mol, a1, a2, bond_type): """Add a new bond between atom""" emol = Chem.EditableMol(mol) emol.AddBond(a1.GetIdx(), a2.GetIdx(), bond_type) return dm.sanitize_mol(emol.GetMol())
#!/usr/bin/env python from rdkit import Chem from rdkit.Chem import AllChem import os template = Chem.MolFromSmiles('CCC1=C(C2=NC1=CC3=C(C4=C([N-]3)C(=C5[C@H]([C@@H](C(=N5)C=C6C(=C(C(=C2)[N-]6)C=C)C)C)CCC(=O)OC/C=C(\C)/CCCC(C)CCCC(C)CCCC(C)C)[C@H](C4=O)C(=O)OC)C)C.[Mg+2]') for filename in os.listdir("."): if filename[-4:] == ".pdb": mol = Chem.MolFromPDBFile(filename,sanitize=False) bs=[] for atom in mol.GetAtoms(): if atom.GetSymbol() == "Mg": atom.SetNoImplicit(True) for bond in atom.GetBonds(): bs.append([bond.GetBeginAtomIdx(),bond.GetEndAtomIdx()]) emol = Chem.EditableMol(mol) for b in bs: emol.RemoveBond(b[0],b[1]) newmol = emol.GetMol() print Chem.MolToSmiles(newmol) newmol=Chem.AllChem.AssignBondOrdersFromTemplate(template,newmol) molHs=Chem.AddHs(newmol,addCoords=True) open(filename[:-4]+'-addedHs.mol','w').write(Chem.MolToMolBlock(molHs))
def adj_to_nlist(atoms, A, nlist_model, embeddings): bonds = { 1: Chem.rdchem.BondType.SINGLE, 2: Chem.rdchem.BondType.DOUBLE, 3: Chem.rdchem.BondType.TRIPLE } m = Chem.EditableMol(Chem.Mol()) for a in atoms: m.AddAtom(Chem.Atom(a)) for i in range(len(atoms)): for j in range(i, len(atoms)): if A[i, j] > 0: m.AddBond(i, j, bonds[A[i, j]]) mol = m.GetMol() try: AllChem.EmbedMolecule(mol) # Not necessary according to current docs ''' mol.UpdatePropertyCache(strict=False) for i in range(1000): r = AllChem.MMFFOptimizeMolecule(mol, maxIters=100) if r == 0: break if r == -1: raise ValueError() ''' except (ValueError, RuntimeError) as e: print('Unable to process') print(Chem.MolToSmiles(mol)) raise e for c in mol.GetConformers(): pos = c.GetPositions() N = len(pos) np_pos = np.zeros((N, 3)) np_pos[:N, :] = pos pos_nlist = nlist_model(np_pos) nlist = np.zeros((MAX_ATOM_NUMBER, NEIGHBOR_NUMBER, 3)) # compute bond distances bonds = np.zeros((MAX_ATOM_NUMBER, MAX_ATOM_NUMBER), dtype=np.int64) # need to rebuild adjacency matrix with new atom ordering for b in mol.GetBonds(): bonds[b.GetBeginAtomIdx(), b.GetEndAtomIdx()] = 1 bonds[b.GetEndAtomIdx(), b.GetBeginAtomIdx()] = 1 # a 0 -> non-bonded for index in range(N): for ni in range(len(pos_nlist[index])): if pos_nlist[ index, ni, 0] >= 100: # this is a large distance sentinel indicating not part of nlist continue j = int(pos_nlist[index, ni, 1]) # / 10 to get to nm nlist[index, ni, 0] = pos_nlist[index, ni, 0] / 10 nlist[index, ni, 1] = j # a 0 -> non-bonded if bonds[index, ni] == 0: nlist[index, ni, 2] = embeddings['nlist']['nonbonded'] else: # currently only single is used! nlist[index, ni, 2] = embeddings['nlist'][1] # pad out the nlist for index in range(N, MAX_ATOM_NUMBER): for ni in range(NEIGHBOR_NUMBER): nlist[index, ni, 0] = 0 nlist[index, ni, 1] = 0 nlist[index, ni, 2] = embeddings['nlist']['none'] if False: # debugging print(nlist[:len(atoms)]) a1, a2 = np.nonzero(A) for a1i, a2i in zip(a1, a2): print(a1i, a2i) exit() yield nlist
def construct_ligand(fragment_ids, bond_ids, fragment_library): """ Construct a ligand by connecting multiple fragments based on a Combination object Parameters ---------- fragment_ids: list of str Fragment IDs of recombined ligand, e.g. `["SE_2", "AP_0", "FP_2"]` (`<subpocket>_<fragment index in subpocket pool>`). bond_ids : list of list of str Bond IDs of recombined ligand, e.g. `[["FP_6", "AP_10"], ["AP_11", "SE_13"]]`: Atom (`<subpocket>_<atom ID>`) pairs per fragment bond. fragment_library : dict of pandas.DataFrame SMILES and RDKit molecules for fragments (values) per subpocket (key). Returns ------- ligand: rdkit.Chem.rdchem.Mol or None Recombined ligand (or None if the ligand could not be constructed) """ fragments = [] for fragment_id in fragment_ids: # Get subpocket and fragment index in subpocket subpocket = fragment_id[:2] fragment_index = int(fragment_id[3:]) fragment = fragment_library[subpocket].ROMol_original[fragment_index] # Store unique atom identifiers in original molecule (important for recombined ligand construction based on atom IDs) fragment = Chem.RemoveHs(fragment) for i, atom in enumerate(fragment.GetAtoms()): fragment_atom_id = f"{subpocket}_{i}" atom.SetProp("fragment_atom_id", fragment_atom_id) atom.SetProp("fragment_id", fragment.GetProp("complex_pdb")) fragment = PropertyMol(fragment) # Append fragment to list of fragments fragments.append(fragment) # Combine fragments using map-reduce model combo = reduce(Chem.CombineMols, fragments) bonds_matching = True ed_combo = Chem.EditableMol(combo) replaced_dummies = [] atoms = combo.GetAtoms() for bond in bond_ids: dummy_1 = next(atom for atom in combo.GetAtoms() if atom.GetProp("fragment_atom_id") == bond[0]) dummy_2 = next(atom for atom in combo.GetAtoms() if atom.GetProp("fragment_atom_id") == bond[1]) atom_1 = dummy_1.GetNeighbors()[0] atom_2 = dummy_2.GetNeighbors()[0] # check bond types bond_type_1 = combo.GetBondBetweenAtoms(dummy_1.GetIdx(), atom_1.GetIdx()).GetBondType() bond_type_2 = combo.GetBondBetweenAtoms(dummy_2.GetIdx(), atom_2.GetIdx()).GetBondType() if bond_type_1 != bond_type_2: bonds_matching = False break ed_combo.AddBond(atom_1.GetIdx(), atom_2.GetIdx(), order=bond_type_1) replaced_dummies.extend([dummy_1.GetIdx(), dummy_2.GetIdx()]) # Do not construct this ligand if bond types are not matching if not bonds_matching: return # Remove replaced dummy atoms replaced_dummies.sort(reverse=True) for dummy in replaced_dummies: ed_combo.RemoveAtom(dummy) ligand = ed_combo.GetMol() # Replace remaining dummy atoms with hydrogens du = Chem.MolFromSmiles("*") h = Chem.MolFromSmiles("[H]", sanitize=False) ligand = AllChem.ReplaceSubstructs(ligand, du, h, replaceAll=True)[0] try: ligand = Chem.RemoveHs(ligand) except ValueError: print(Chem.MolToSmiles(ligand)) return # Clear properties for prop in ligand.GetPropNames(): ligand.ClearProp(prop) for atom in ligand.GetAtoms(): atom.ClearProp("fragment_atom_id") # Generate 2D coordinates AllChem.Compute2DCoords(ligand) return ligand
Chem.rdchem.BondType.DOUBLE, Chem.rdchem.BondType.TRIPLE, Chem.rdchem.BondType.AROMATIC] smi_arr = [] mol_arr = [] ct = 0 len_iter = 0 while True: len_iter+=1 if ct == 100: break try: sample = model.sample(1, 8)[0] sample = [sample[0][0], sample[1][0],sample[2][0]] new_mol = Chem.Mol() new_mol = Chem.EditableMol(new_mol) for i in range(8): new_atom_type = atom_types[np.argmax(sample[0][i])] new_atom = Chem.Atom(new_atom_type) ind = new_mol.AddAtom(new_atom) assert ind == i for i in range(8): for j in range(i+1, 8): if sample[2][i, j, 1] > 0.5: new_mol.AddBond(i, j, Chem.rdchem.BondType.SINGLE) mol = new_mol.GetMol() Chem.SanitizeMol(mol) smi = Chem.MolToSmiles(mol) if '.' in smi: continue print(smi)
def func_chain_retro(lp, rxn): ''' Functions to retrosynthetically functionalize chains. The majority of reactions only transform at most one match. ''' if type(lp) == str or type( lp ) == Chem.rdchem.Mol: #only for convenience. Pass in LinearPol object when possible lp = ru.LinearPol(lp) if rxn in ['aldean', 'thiol-ene', 'azal']: ls = func_chain_rxns[rxn] for smart in ls: RD_rxn = Chem.AllChem.ReactionFromSmarts( smart) #RDKit reaction object rxn_out = RD_rxn.RunReactants((lp.mol, )) if len(rxn_out) != 0: for mol in rxn_out[0]: #only look at first set of reactants if mol.HasSubstructMatch(Chem.MolFromSmarts('[#0]')): return [mol] return [] replace_group = func_chain_rxns[rxn] if rxn == 'nitro_base': sc = lp.SideChainMol() if sc is None: return [] sc_matches = sc.GetSubstructMatches(replace_group) if len(sc_matches) != 0: return [] matches = lp.mol.GetSubstructMatches(replace_group) if len(matches) == 0: return [] flat_matches = ru.flatten_ll(matches) if lp.star_inds[0] in flat_matches or lp.star_inds[ 1] in flat_matches: #make sure the connector inds aren't being matched return [] else: mols = [] if rxn == 'nitro_base': #this branch is for reactions where arbitrary groups must be removed frag_mol = Chem.FragmentOnBonds(lp.mol, [ lp.mol.GetBondBetweenAtoms(i, j).GetIdx() for i, j in matches ]) frag_mols = Chem.GetMolFrags(frag_mol)[1:] frag_ls = Chem.MolToSmiles( ru.mol_without_atom_index(frag_mol)).split('.')[1:] frag_ls = [re.sub('\[[0-9]+\*\]', '*', s) for s in frag_ls] unique_frags = {} #SMILES - list of atom matches for i, s in enumerate(frag_ls): if s not in unique_frags: unique_frags[s] = list(frag_mols[i]) else: unique_frags[s].extend(frag_mols[i]) #for L in range( 1,len(unique_frags.keys())+1 ): for combo_matches in unique_frags.values(): all_atoms = set(range(lp.mol.GetNumAtoms())) keep_atoms = all_atoms.difference(combo_matches) mols.append( lp.SubChainMol( lp.mol, [lp.mol.GetAtomWithIdx(x) for x in keep_atoms])) else: n_matches = len(matches) for L in range(1, n_matches + 1): for match_combo in itertools.combinations(matches, L): o_inds = [] for match in match_combo: o_inds.append(match[1]) o_inds.append(match[2]) o_inds = sorted(o_inds, reverse=True) em = Chem.EditableMol(lp.mol) [em.RemoveAtom(x) for x in o_inds] new_mol = em.GetMol() try: Chem.SanitizeMol(new_mol) mols.append(new_mol) except: pass return mols
def _vec_to_mol(dv, de, atom_list, bpatt_dim, train=False): def to_dummy(vec, ax=1, thr=1): return np.concatenate([vec, thr - np.sum(vec, ax, keepdims=True)], ax) def cat_to_val(vec, cat): cat.append(0) return np.array(cat)[vec] bond_ref = [Chem.BondType.SINGLE, Chem.BondType.DOUBLE, Chem.BondType.TRIPLE] node_atom = np.argmax(to_dummy(dv[:,-len(atom_list):], 1), 1) node_charge = cat_to_val(np.argmax(to_dummy(dv[:,:4], 1), 1), [-1, 1, 2, 3]) node_exp = cat_to_val(np.argmax(to_dummy(dv[:,4:7], 1), 1), [1, 2, 3]) edge_bond = np.argmax(to_dummy(de[:,:,:len(bond_ref)], 2), 2) edge_patt = [cat_to_val(np.argmax(to_dummy(de[:,:,len(bond_ref)+sum(bpatt_dim[:i]):len(bond_ref)+sum(bpatt_dim[:i+1])], 2), 2), list(range(1, bpatt_dim[i]+1)) ) for i in range(len(bpatt_dim))] selid = np.where(node_atom<len(atom_list))[0] node_atom = node_atom[selid] node_charge = node_charge[selid] node_exp = node_exp[selid] edge_bond = edge_bond[selid][:,selid] edge_patt = [ep[selid][:,selid] for ep in edge_patt] edmol = Chem.EditableMol(Chem.MolFromSmiles('')) m = len(node_atom) for j in range(m): atom_add = Chem.Atom(atom_list[node_atom[j]]) if node_charge[j] != 0: atom_add.SetFormalCharge(int(node_charge[j])) if node_exp[j] > 0: atom_add.SetNumExplicitHs(int(node_exp[j])) edmol.AddAtom(atom_add) for j in range(m-1): for k in range(j+1, m): if edge_bond[j, k] < len(bond_ref): edmol.AddBond(j, k, bond_ref[edge_bond[j, k]]) for j in range(len(node_atom)): for k in range(j + 1, len(node_atom)): for _ in range(edge_patt[0][j,k]): edmol.AddAtom(Chem.Atom('C')) edmol.AddAtom(Chem.Atom('C')) edmol.AddBond(j, m, bond_ref[0]) edmol.AddBond(m, m+1, bond_ref[0]) edmol.AddBond(m+1, k, bond_ref[0]) m += 2 for _ in range(edge_patt[1][j,k]): edmol.AddAtom(Chem.Atom('C')) edmol.AddBond(j, m, bond_ref[0]) edmol.AddBond(m, k, bond_ref[0]) m += 1 for _ in range(edge_patt[2][j,k]): edmol.AddAtom(Chem.Atom('C')) edmol.AddAtom(Chem.Atom('C')) edmol.AddBond(j, m, bond_ref[0]) edmol.AddBond(m, m+1, bond_ref[1]) edmol.AddBond(m+1, k, bond_ref[0]) m += 2 for _ in range(edge_patt[3][j,k]): edmol.AddAtom(Chem.Atom('C')) edmol.AddAtom(Chem.Atom('C')) edmol.AddBond(j, m, bond_ref[1]) edmol.AddBond(m, m+1, bond_ref[0]) edmol.AddBond(m+1, k, bond_ref[1]) m += 2 for _ in range(edge_patt[4][j,k]): edmol.AddAtom(Chem.Atom('N')) edmol.AddBond(j, m, bond_ref[0]) edmol.AddBond(m, k, bond_ref[0]) m += 1 for _ in range(edge_patt[5][j,k]): edmol.AddAtom(Chem.Atom('O')) edmol.AddBond(j, m, bond_ref[0]) edmol.AddBond(m, k, bond_ref[0]) m += 1 mol_rec = edmol.GetMol() # sanity check Chem.SanitizeMol(mol_rec) mol_n = Chem.MolFromSmiles(Chem.MolToSmiles(mol_rec)) output = Chem.MolToSmiles(mol_n) if train and '.' in output: raise return output
def to_rdmol(plams_mol, sanitize=True, properties=True, assignChirality=False): """ Translate a PLAMS molecule into an RDKit molecule type. PLAMS |Molecule|, |Atom| or |Bond| properties are pickled if they are neither booleans, floats, integers, floats nor strings, the resulting property names are appended with '_pickled'. :parameter plams_mol: A PLAMS molecule :parameter bool sanitize: Kekulize, check valencies, set aromaticity, conjugation and hybridization :parameter bool properties: If all |Molecule|, |Atom| and |Bond| properties should be converted from PLAMS to RDKit format. :parameter bool assignChirality: Assign R/S and cis/trans information, insofar as this was not yet present in the PLAMS molecule. :type plams_mol: |Molecule| :return: an RDKit molecule :rtype: rdkit.Chem.Mol """ if isinstance(plams_mol, Chem.Mol): return plams_mol # Create rdkit molecule e = Chem.EditableMol(Chem.Mol()) # Add atoms and assign properties to the RDKit atom if *properties* = True for pl_atom in plams_mol.atoms: rd_atom = Chem.Atom(pl_atom.atnum) if 'charge' in pl_atom.properties: rd_atom.SetFormalCharge(pl_atom.properties.charge) if properties: if 'pdb_info' in pl_atom.properties: set_PDBresidueInfo(rd_atom, pl_atom.properties.pdb_info) for prop in pl_atom.properties: if prop not in ('charge', 'pdb_info', 'stereo'): prop_to_rdmol(pl_atom, rd_atom, prop) # Check for R/S information if pl_atom.properties.stereo: stereo = pl_atom.properties.stereo.lower() if stereo == 'counter-clockwise': rd_atom.SetChiralTag( Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW) elif stereo == 'clockwise': rd_atom.SetChiralTag(Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW) e.AddAtom(rd_atom) # Mapping of PLAMS bond orders to RDKit bond types: def plams_to_rd_bonds(bo): if bo > 1.4 and bo < 1.6: return 12 # bond type for aromatic bond else: return int(bo) # Add bonds to the RDKit molecule for bond in plams_mol.bonds: a1 = plams_mol.atoms.index(bond.atom1) a2 = plams_mol.atoms.index(bond.atom2) e.AddBond(a1, a2, Chem.BondType(plams_to_rd_bonds(bond.order))) rdmol = e.GetMol() # Check for cis/trans information for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()): if pl_bond.properties.stereo: stereo = pl_bond.properties.stereo.lower() if stereo == 'e' or stereo == 'trans': rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOE) elif stereo == 'z' or stereo == 'cis': rd_bond.SetStereo(Chem.rdchem.BondStereo.STEREOZ) elif stereo == 'up': rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDUPRIGHT) elif stereo == 'down': rd_bond.SetBondDir(Chem.rdchem.BondDir.ENDDOWNRIGHT) # Assign properties to RDKit molecule and bonds if *properties* = True if properties: for prop in plams_mol.properties: prop_to_rdmol(plams_mol, rdmol, prop) for pl_bond, rd_bond in zip(plams_mol.bonds, rdmol.GetBonds()): for prop in pl_bond.properties: if prop != 'stereo': prop_to_rdmol(pl_bond, rd_bond, prop) if sanitize: Chem.SanitizeMol(rdmol) conf = Chem.Conformer() for i, atom in enumerate(plams_mol.atoms): xyz = Geometry.Point3D(atom._getx(), atom._gety(), atom._getz()) conf.SetAtomPosition(i, xyz) rdmol.AddConformer(conf) # REB: Assign all stereochemistry, if it wasn't already there if assignChirality: Chem.rdmolops.AssignAtomChiralTagsFromStructure( rdmol, confId=conf.GetId(), replaceExistingTags=False) try: Chem.AssignStereochemistryFrom3D(rdmol, confId=conf.GetId(), replaceExistingTags=False) except AttributeError: pass return rdmol
def ring_fixer(cross_mol, parent_mol): '''two point crossover can lead to too short and too long molecules, this function either adds or subtracts from the molecule''' rings = cross_mol.GetRingInfo() num_rings = rings.NumRings() print "number of rings = " + str(num_rings) if num_rings == 5: new_mol = cross_mol if num_rings < 5: ring_ats = rings.AtomRings()[num_rings - 1] #print ring_ats end_ats = [] for atom in cross_mol.GetAtoms(): if atom.GetIdx() in ring_ats: if len(atom.GetBonds()) == 2: end_ats.append(atom.GetIdx()) #print end_ats #print num_rings cp = return_cut_points(parent_mol) #print cp ring_cp = [cp[num_rings - 1] + cp[-num_rings]] print ring_cp #actual_cp = (ring_cp[0]) + (ring_cp[1]) actual_cp = get_actual_cut_pts(parent_mol, num_rings, cp) print actual_cp #add_ring_cp = get_actual_cut_pts(parent_mol,num_rings-1,cp) additional_ring = frag_mol(parent_mol, actual_cp, bf='n')[1] #print Chem.MolToSmiles(cross_mol) #print Chem.MolToSmiles(additional_ring) te2_m = Chem.EditableMol(cross_mol) te2_m.ReplaceAtom(end_ats[1], Chem.Atom(0)) te2_m.ReplaceAtom(end_ats[2], Chem.Atom(0)) tm2 = te2_m.GetMol() Chem.SanitizeMol(tm2) #print Chem.MolToSmiles(tm2) temp_mol = Chem.CombineMols(tm2, additional_ring) #new_mol = cross_mol new_mol = join_mols(temp_mol) if num_rings > 5: #print num_rings cut_pts = return_cut_points(cross_mol) #print cut_pts ring_ats = rings.AtomRings()[5] #if n_diff >=2: # ring_ats = rings.AtomRings()[4]#num_rings-1] #print ring_ats cut_atoms = [] cut_atoms.append(ring_ats[-1]) cut_atoms.append(ring_ats[-2]) print cut_atoms cp_tup = [(cut_at, cut_pt) for cut_at, cut_pt in cut_pts if cut_at in cut_atoms] #actual_cp = get_actual_cut_pts(cross_mol,4,cut_pts) #print actual_cp frags = frag_mol(cross_mol, cp_tup, bf='n') temp_mol = frags[0] dum_ats = find_dummy_atoms(temp_mol) #print dum_ats #print dum_ats[0][1][0] #print dum_ats[1][1][0] te_m = Chem.EditableMol(temp_mol) rem_at = te_m.RemoveAtom(dum_ats[1][0]) rem_at2 = te_m.RemoveAtom(dum_ats[0][0]) new_at = te_m.AddAtom(Chem.Atom( 6)) ## these new atoms are added onto the end of the atom idx new_at2 = te_m.AddAtom( Chem.Atom(6)) ##all orig atoms are moved down by 1 #print new_at #print new_at2 te_m.AddBond(new_at, dum_ats[0][1][0], Chem.BondType.AROMATIC) te_m.AddBond(new_at, new_at2, Chem.BondType.AROMATIC) te_m.AddBond(new_at2, dum_ats[-1][-1][1], Chem.BondType.AROMATIC) tm = te_m.GetMol() Chem.SanitizeMol(tm) #trings = tm.GetRingInfo() #tnr = trings.NumRings() new_mol = tm #prop_cross.append(new_mol) return new_mol
def dethiolate(self) -> Chem.Mol: dethio = Chem.EditableMol(self.thio_mol) dethio.RemoveAtom(self.SX_idx) return dethio.GetMol()
def getFragmentB(self): frag = Chem.EditableMol(self.frag2).GetMol() for atom in frag.GetAtoms(): atom.ClearProp('molAtomMapNumber') return Chem.MolToSmiles(frag)
def BreakBRICSBonds(mol, bonds=None, sanitize=True, silent=True): """ breaks the BRICS bonds in a molecule and returns the results >>> from rdkit import Chem >>> m = Chem.MolFromSmiles('CCCOCC') >>> m2=BreakBRICSBonds(m) >>> Chem.MolToSmiles(m2,True) '[3*]O[3*].[4*]CC.[4*]CCC' a more complicated case: >>> m = Chem.MolFromSmiles('CCCOCCC(=O)c1ccccc1') >>> m2=BreakBRICSBonds(m) >>> Chem.MolToSmiles(m2,True) '[16*]c1ccccc1.[3*]O[3*].[4*]CCC.[4*]CCC([6*])=O' can also specify a limited set of bonds to work with: >>> m = Chem.MolFromSmiles('CCCOCC') >>> m2 = BreakBRICSBonds(m,[((3, 2), ('3', '4'))]) >>> Chem.MolToSmiles(m2,True) '[3*]OCC.[4*]CCC' this can be used as an alternate approach for doing a BRICS decomposition by following BreakBRICSBonds with a call to Chem.GetMolFrags: >>> m = Chem.MolFromSmiles('CCCOCC') >>> m2=BreakBRICSBonds(m) >>> frags = Chem.GetMolFrags(m2,asMols=True) >>> [Chem.MolToSmiles(x,True) for x in frags] ['[4*]CCC', '[3*]O[3*]', '[4*]CC'] """ if not bonds: #bonds = FindBRICSBonds(mol) res = Chem.FragmentOnBRICSBonds(mol) if sanitize: Chem.SanitizeMol(res) return res eMol = Chem.EditableMol(mol) nAts = mol.GetNumAtoms() dummyPositions = [] for indices, dummyTypes in bonds: ia, ib = indices obond = mol.GetBondBetweenAtoms(ia, ib) bondType = obond.GetBondType() eMol.RemoveBond(ia, ib) da, db = dummyTypes atoma = Chem.Atom(0) atoma.SetIsotope(int(da)) atoma.SetNoImplicit(True) idxa = nAts nAts += 1 eMol.AddAtom(atoma) eMol.AddBond(ia, idxa, bondType) atomb = Chem.Atom(0) atomb.SetIsotope(int(db)) atomb.SetNoImplicit(True) idxb = nAts nAts += 1 eMol.AddAtom(atomb) eMol.AddBond(ib, idxb, bondType) if mol.GetNumConformers(): dummyPositions.append((idxa, ib)) dummyPositions.append((idxb, ia)) res = eMol.GetMol() if sanitize: Chem.SanitizeMol(res) if mol.GetNumConformers(): for conf in mol.GetConformers(): resConf = res.GetConformer(conf.GetId()) for ia, pa in dummyPositions: resConf.SetAtomPosition(ia, conf.GetAtomPosition(pa)) return res
def handle_covalent_mol(self, lig_res_name, non_cov_mol): ''' Do some magic if we think the molecule has a covalent attachment :param lig_res_name: Name of the covalent ligand :param non_cov_mol: Previous .mol file that does not have covalent attachment in it. :return: A new mol file IF the lig_res is indeed covalent. ''' # original pdb = self.pdbfile (already aligned) # lig res name = name of ligand to find link for covalent = False for line in self.pdbfile: if 'LINK' in line: zero = line[13:27] one = line[43:57] if lig_res_name in zero: res = one covalent = True if lig_res_name in one: res = zero covalent = True if covalent: for line in self.pdbfile: if 'ATOM' in line and line[13:27] == res: res_x = float(line[31:39]) res_y = float(line[39:47]) res_z = float(line[47:55]) res_coords = [res_x, res_y, res_z] print(res_coords) atm = Chem.MolFromPDBBlock(line) atm_trans = atm.GetAtomWithIdx(0) orig_pdb_block = Chem.MolToPDBBlock(non_cov_mol) lig_block = '\n'.join( [l for l in orig_pdb_block.split('\n') if 'COMPND' not in l]) lig_lines = [l for l in lig_block.split('\n') if 'HETATM' in l] j = 0 old_dist = 100 for line in lig_lines: j += 1 # print(line) if 'HETATM' in line: coords = [ line[31:39].strip(), line[39:47].strip(), line[47:55].strip() ] dist = self.get_3d_distance(coords, res_coords) if dist < old_dist: ind_to_add = j print(dist) old_dist = dist i = non_cov_mol.GetNumAtoms() edmol = Chem.EditableMol(non_cov_mol) edmol.AddAtom(atm_trans) edmol.AddBond(ind_to_add - 1, i, Chem.BondType.SINGLE) new_mol = edmol.GetMol() conf = new_mol.GetConformer() conf.SetAtomPosition( i, Point3D(res_coords[0], res_coords[1], res_coords[2])) return new_mol