def align_mcs(mols, num_confs): suppl = [m for m in AllChem.SDMolSupplier('/Users/tom/code_test_repository/arrow_testing/cdk2.sdf', removeHs=False)] ref_mol = suppl[0] print(f'ref mol has atoms = {ref_mol.GetNumAtoms()}') mols_b = copy.deepcopy(mols) mol_blocks = [] for mol in mols_b: mol = AllChem.AddHs(mol) AllChem.EmbedMultipleConfs(mol, numConfs=num_confs) mcs = rdFMCS.FindMCS([mol, ref_mol]) smarts = mcs.smartsString match = Chem.MolFromSmarts(smarts) test_match_atoms = mol.GetSubstructMatch(match) ref_match_atoms = ref_mol.GetSubstructMatch(match) #Find alignments of all conformers of new drug to old drug: alignments_scores =[rdMolAlign.AlignMol(mol, ref_mol, prbCid=i, atomMap=[[i,j] for i,j in zip(test_match_atoms, ref_match_atoms)]) for i in range(num_confs)] confId=int(np.argmin(alignments_scores)) AllChem.CanonicalizeConformer(mol.GetConformer(confId)) # print(Chem.MolToMolBlock(mol)) mol_blocks.append(Chem.MolToMolBlock(mol)) return pa.array(mol_blocks)
def align_probe_to_target(self, probe: Chem.Mol) -> int: # implace """ :param probe: modified inplace :return: index of best conformer """ ### find what is common common = self._get_common(probe) ### Align them overlap_target = self.target.GetSubstructMatch(common) overlap_probe = probe.GetSubstructMatch(common) atomMap = [ (probe_at, target_at) for probe_at, target_at in zip(overlap_probe, overlap_target) ] rmss = [ rdMolAlign.AlignMol(probe, self.target, prbCid=i, atomMap=atomMap, maxIters=500) for i in range(probe.GetNumConformers()) ] # print(rmss) best_i = rmss.index(min(rmss)) return best_i
def PerformAlignmentAndWrieOutput(RefMol, ProbeMol, RefMolName, ProbeMolName, Writer): """Perform alignment and write to output file.""" Status = True try: if OptionsInfo["UseRMSD"]: RMSD = rdMolAlign.AlignMol(ProbeMol, RefMol, maxIters=OptionsInfo["MaxIters"]) elif OptionsInfo["UseBestRMSD"]: RMSD = AllChem.GetBestRMS(RefMol, ProbeMol) elif OptionsInfo["UseOpen3A"]: O3A = rdMolAlign.GetO3A(ProbeMol, RefMol) Score = O3A.Align() elif OptionsInfo["UseCrippenOpen3A"]: CrippenO3A = rdMolAlign.GetCrippenO3A(ProbeMol, RefMol) Score = CrippenO3A.Align() else: MiscUtil.PrintError( "Alignment couldn't be performed: Specified alignment value, %s, is not supported" % OptionsInfo["Alignment"]) except (RuntimeError, ValueError): Status = False MiscUtil.PrintWarning( "Alignment failed between reference molecule, %s, and probe molecule, %s.\nWriting unaligned probe molecule...\n" % (RefMolName, ProbeMolName)) # Write out aligned probe molecule... Writer.write(ProbeMol) return Status
def align_probe_to_target(self) -> int: # implace """ This aligns the probe to the first molecule in order to get most positions okay. :param probe: modified inplace :return: index of best conformer """ ### find what is common common = self._get_common(self.best_hit.mol) ### Align them overlap_target = self.best_hit.mol.GetSubstructMatch(common) overlap_probe = self.dethio_mol.GetSubstructMatch(common) atomMap = [ (probe_at, target_at) for probe_at, target_at in zip(overlap_probe, overlap_target) ] rmss = [ rdMolAlign.AlignMol(self.dethio_mol, self.best_hit.mol, prbCid=i, atomMap=atomMap, maxIters=500) for i in range(self.dethio_mol.GetNumConformers()) ] # print(rmss) best_i = rmss.index(min(rmss)) return best_i
def test2AtomMap(self): atomMap = ((18, 27), (13, 23), (21, 14), (24, 7), (9, 19), (16, 30)) file1 = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign', 'test_data', '1oir.mol') file2 = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign', 'test_data', '1oir_conf.mol') mol1 = Chem.MolFromMolFile(file1) mol2 = Chem.MolFromMolFile(file2) rmsd = rdMolAlign.AlignMol(mol2, mol1, 0, 0, atomMap) self.assertAlmostEqual(rmsd, 0.8525, 4)
def calculate_rmsd(ligand, reference): """ Use RDKit molecule aligner just once, so it calculates the RMSD of both Parameters ---------- ligand, reference : rdkit.Chem.molecule """ if ligand is not None and reference is not None: return rdMolAlign.AlignMol(ligand, reference, maxIters=0) return -3.0
def test3Weights(self): atomMap = ((18, 27), (13, 23), (21, 14), (24, 7), (9, 19), (16, 30)) file1 = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign', 'test_data', '1oir.mol') file2 = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign', 'test_data', '1oir_conf.mol') mol1 = Chem.MolFromMolFile(file1) mol2 = Chem.MolFromMolFile(file2) wts = (1.0, 1.0, 1.0, 1.0, 1.0, 2.0) rmsd = rdMolAlign.AlignMol(mol2, mol1, 0, 0, atomMap, wts) self.failUnlessAlmostEqual(rmsd, 0.9513, 4)
def CalculateRMSDValue(RefMol, ProbeMol): """Calculate RMSD value for a pair of molecules and return it as a string""" try: if OptionsInfo["UseBestRMSD"]: RMSD = AllChem.GetBestRMS(RefMol, ProbeMol) else: RMSD = rdMolAlign.AlignMol(ProbeMol, RefMol, maxIters = OptionsInfo["MaxIters"]) RMSD = "%.2f" % RMSD except (RuntimeError, ValueError): RMSD = "None" return RMSD
def realign_mol(mol,conf,coord_Map, alg_Map, mol_template,args,log): num_atom_match = mol.GetSubstructMatch(mol_template) GetFF = Chem.UFFGetMoleculeForceField(mol,confId=conf) for k, idxI in enumerate(num_atom_match): for l in range(k + 1, len(num_atom_match)): idxJ = num_atom_match[l] d = coord_Map[idxI].Distance(coord_Map[idxJ]) GetFF.AddDistanceConstraint(idxI, idxJ, d, d, 10000) GetFF.Initialize() GetFF.Minimize(maxIts=args.opt_steps_RDKit) # rotate the embedded conformation onto the core_mol: rdMolAlign.AlignMol(mol, mol_template, prbCid=conf,refCid=-1,atomMap=alg_Map,reflect=True,maxIters=100) return mol,GetFF
def pretweak(self) -> None: """ What if the fragments were prealigned slightly? Really bad things. :return: """ warn('This method is unreliable. Do not use it') ref = self.hits[0] for target in self.hits[1:]: A2B = list(self.get_positional_mapping(target, ref, 0.5).items()) if A2B: rdMolAlign.AlignMol(target, ref, atomMap=A2B, maxIters=500) else: warn(f'No overlap? {A2B}')
def test7ConstrainedEmbedding(self): ofile = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'DistGeomHelpers', 'test_data', 'constrain1.sdf') suppl = Chem.SDMolSupplier(ofile) ref = next(suppl) probe = copy.deepcopy(ref) cMap = {} for i in range(5): cMap[i] = ref.GetConformer().GetAtomPosition(i) ci = rdDistGeom.EmbedMolecule(probe, coordMap=cMap, randomSeed=23) self.assertTrue(ci > -1) algMap = list(zip(range(5), range(5))) ssd = rdMolAlign.AlignMol(probe, ref, atomMap=algMap) self.assertTrue(ssd < 0.1)
def template_embed_optimize(molecule_embed,molecule,mol_1,args,log): #assigning and embedding onto the core num_atom_match = molecule_embed.GetSubstructMatch(mol_1) #add H's to molecule molecule_embed = Chem.AddHs(molecule_embed) #definition of coordmap, the coreconfID(the firstone =-1) coordMap = {} coreConfId=-1 randomseed=-1 force_constant=10000 # This part selects which atoms from molecule are the atoms of the core try: coreConf = mol_1.GetConformer(coreConfId) except: pass for k, idxI in enumerate(num_atom_match): core_mol_1 = coreConf.GetAtomPosition(k) coordMap[idxI] = core_mol_1 ci = rdDistGeom.EmbedMolecule(molecule_embed, coordMap=coordMap, randomSeed=randomseed) if ci < 0: log.write('Could not embed molecule.') coordMap = None algMap = None if ci >= 0: GetFF = Chem.UFFGetMoleculeForceField(molecule_embed,confId=-1) #algin molecule to the core algMap = [(k, l) for l, k in enumerate(num_atom_match)] for k, idxI in enumerate(num_atom_match): for l in range(k + 1, len(num_atom_match)): idxJ = num_atom_match[l] d = coordMap[idxI].Distance(coordMap[idxJ]) GetFF.AddDistanceConstraint(idxI, idxJ, d, d, force_constant) GetFF.Initialize() GetFF.Minimize(maxIts=args.opt_steps_RDKit) # rotate the embedded conformation onto the core_mol: rdMolAlign.AlignMol(molecule_embed, mol_1, atomMap=algMap,reflect=True,maxIters=100) return molecule_embed, coordMap, algMap, ci
def main(): smiles = [EPINEPHRINE, CLONIPIDINE] mols = [Chem.MolFromSmiles(m) for m in smiles] mols = [Chem.AddHs(m) for m in mols] i = 0 for m in mols: print(m) AllChem.EmbedMolecule(m) AllChem.MMFFOptimizeMolecule(m, maxIters=200) Chem.MolToMolFile(m, '{0}.mol'.format(i)) Draw.MolToFile(m, 'm{0}.png'.format(i)) i += 1 refMol1 = AllChem.MMFFGetMoleculeProperties(mols[0]) refMol2 = AllChem.MMFFGetMoleculeProperties(mols[1]) # somehow this shit is the key pyO3A = AllChem.GetO3A(mols[0], mols[1], refMol1, refMol2) print('align') print(pyO3A.Align()) print(pyO3A.Matches()) Chem.MolToMolFile(mols[0], '0_.mol') Chem.MolToMolFile(mols[1], '1_.mol') ff = AllChem.UFFGetMoleculeForceField(mols[0]) ff.Initialize() ff.Minimize(maxIts=200) rmsd = ChemAlign.AlignMol(mols[0], mols[1], atomMap=pyO3A.Matches()) print('rmsd') print(rmsd) print('energy') print(ff.CalcEnergy()) # launch pymol in server mode: `./pymol -R` # it resides in ~/anaconda/bin v = PyMol.MolViewer() v.ShowMol(mols[0]) v.GetPNG(h=200)
def get_rmsd(self, molecule, molecule_2): """ It returns the RMSD between two RDKit molecules. Parameters ---------- molecule : an peleffy.topology.Molecule The peleffy's Molecule object molecule_2 : an peleffy.topology.Molecule The peleffy's Molecule object Returns ------- rmsd_value : float RMSD between two RDKit molecules """ from rdkit.Chem import rdMolAlign rmsd_value = rdMolAlign.AlignMol(molecule.rdkit_molecule, molecule_2.rdkit_molecule) return rmsd_value
def rmsd(mol: Chem.rdchem.Mol) -> np.ndarray: """Compute the RMSD between all the conformers of a molecule. Args: mol: a molecule """ if mol.GetNumConformers() <= 1: raise ValueError( "The molecule has 0 or 1 conformer. You can generate conformers with `dm.conformers.generate(mol)`." ) n_confs = mol.GetNumConformers() rmsds = [] for i in range(n_confs): for j in range(n_confs): rmsd = rdMolAlign.AlignMol(prbMol=mol, refMol=mol, prbCid=i, refCid=j) rmsds.append(rmsd) return np.array(rmsds).reshape(n_confs, n_confs)
def align_mcs(new_mol, ref_mol): """Ref mol is the crystallized ligand. New mol is the drug you want to add to the structure. It returns an array of scores (one for each conformer), where the lowest score is best""" ##Find maximum common substructure so we can align based on this: mcs = rdFMCS.FindMCS([new_mol, ref_mol]) smarts = mcs.smartsString match = Chem.MolFromSmarts(smarts) test_match_atoms = new_mol.GetSubstructMatch(match) ref_match_atoms = ref_mol.GetSubstructMatch(match) #Find alignments of all conformers of new drug to old drug: alignments_scores = [ rdMolAlign.AlignMol( new_mol, ref_mol, prbCid=i, atomMap=[[i, j] for i, j in zip(test_match_atoms, ref_match_atoms)]) for i in range(100) ] return alignments_scores
def test1Basic(self): file1 = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol', 'MolAlign', 'test_data', '1oir.mol') file2 = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol', 'MolAlign', 'test_data', '1oir_conf.mol') mol1 = Chem.MolFromMolFile(file1) mol2 = Chem.MolFromMolFile(file2) rmsd = rdMolAlign.AlignMol(mol2, mol1) self.failUnless(feq(rmsd, 0.6578)) file3 = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol', 'MolAlign', 'test_data', '1oir_trans.mol') mol3 = Chem.MolFromMolFile(file3) conf2 = mol2.GetConformer() conf3 = mol3.GetConformer() for i in range(mol2.GetNumAtoms()): self.failUnless(lstFeq(conf2.GetAtomPosition(i), conf3.GetAtomPosition(i))) rmsd, trans = rdMolAlign.GetAlignmentTransform(mol2, mol1) self.failUnless(feq(rmsd, 0.6578))
def get_conformers(smiles=None, anchor=None, num_confs=None, output=None): mol = Chem.MolFromSmiles(smiles) AllChem.EmbedMolecule(mol) constrain = Chem.SDMolSupplier(anchor, False)[0] r = rdFMCS.FindMCS([mol, constrain]) a = mol.GetSubstructMatch(Chem.MolFromSmarts(r.smartsString)) b = constrain.GetSubstructMatch(Chem.MolFromSmarts(r.smartsString)) amap = list(zip(a, b)) coors = dict() for i in a: coors[i] = mol.GetConformer().GetAtomPosition(i) w = Chem.SDWriter(output) AllChem.EmbedMolecule(mol) mp = AllChem.MMFFGetMoleculeProperties(mol, mmffVariant='MMFF94s') ff = AllChem.MMFFGetMoleculeForceField(mol, mp) for i in mol.GetSubstructMatch(constrain): ff.MMFFAddPositionConstraint(i, 0, 1.0e5) confs = AllChem.EmbedMultipleConfs(mol, numConfs=int(num_confs), pruneRmsThresh=0.75, coordMap=coors, enforceChirality=True, useExpTorsionAnglePrefs=True, useBasicKnowledge=True) for element in confs: rmsd = rdMolAlign.AlignMol(mol, constrain, element, 0, atomMap=amap) w.write(mol, confId=element) w.close()
def process_one(name, mol, n_confs): n = how_many_conformers(mol) print("init pool size for %s: %d" % (name, n), file=sys.stderr) mol_H = Chem.AddHs(mol) res = Chem.Mol(mol_H) res.RemoveAllConformers() print("generating starting conformers ...", file=sys.stderr) conf_energies = [] print("FF minimization ...", file=sys.stderr) for cid in AllChem.EmbedMultipleConfs(mol_H, n): ff = AllChem.UFFGetMoleculeForceField(mol_H, confId=cid) # print("E before: %f" % ff.CalcEnergy()) ff.Minimize() energy = ff.CalcEnergy() # print("E after: %f" % energy) conformer = mol_H.GetConformer(cid) # print("cid: %d e: %f" % (cid, energy)) conf_energies.append((energy, conformer)) # sort by increasing E conf_energies = sorted(conf_energies, key=lambda x: x[0]) # output non neighbor conformers kept = 0 print("RMSD pruning ...", file=sys.stderr) while kept < n_confs and len(conf_energies) > 0: (e, conf) = conf_energies.pop(0) kept += 1 cid = res.AddConformer(conf, assignId=True) # align conformers to the one of lowest energy if cid != 0: rdMolAlign.AlignMol(res, res, prbCid=cid, refCid=0) # remove neighbors conf_energies = rmsd_filter(mol_H, conf, conf_energies, rmsd_threshold) print("kept %d confs for %s" % (kept, name), file=sys.stderr) name_res = (name, res) #res.SetProp("_Name", name) # !!! not working !!! return name_res
def test1Shape(self): fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ShapeHelpers', 'test_data', '1oir.mol') m = Chem.MolFromMolFile(fileN) rdmt.CanonicalizeMol(m) dims1, offset1 = rdshp.ComputeConfDimsAndOffset(m.GetConformer()) grd = geom.UniformGrid3D(30.0, 16.0, 10.0) rdshp.EncodeShape(m, grd, 0) ovect = grd.GetOccupancyVect() self.failUnless(ovect.GetTotalVal() == 9250) m = Chem.MolFromMolFile(fileN) trans = rdmt.ComputeCanonicalTransform(m.GetConformer()) dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer(), trans=trans) dims -= dims1 offset -= offset1 self.failUnless(feq(dims.Length(), 0.0)) self.failUnless(feq(offset.Length(), 0.0)) grd1 = geom.UniformGrid3D(30.0, 16.0, 10.0) rdshp.EncodeShape(m, grd1, 0, trans) ovect = grd1.GetOccupancyVect() self.failUnless(ovect.GetTotalVal() == 9250) grd2 = geom.UniformGrid3D(30.0, 16.0, 10.0) rdshp.EncodeShape(m, grd2, 0) fileN2 = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ShapeHelpers', 'test_data', '1oir_conf.mol') m2 = Chem.MolFromMolFile(fileN2) rmsd = rdMolAlign.AlignMol(m, m2) self.failUnless(feq(rdshp.ShapeTanimotoDist(m, m2), 0.2813)) dist = rdshp.ShapeTanimotoDist(mol1=m, mol2=m2, confId1=0, confId2=0, gridSpacing=0.25, stepSize=0.125) self.failUnless(feq(dist, 0.3021)) m = Chem.MolFromMolFile(fileN) cpt = rdmt.ComputeCentroid(m.GetConformer()) dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer()) grd = geom.UniformGrid3D(dims.x, dims.y, dims.z, 0.5, DataStructs.DiscreteValueType.TWOBITVALUE, offset) dims -= geom.Point3D(13.927, 16.97, 9.775) offset -= geom.Point3D(-4.353, 16.829, 2.782) self.failUnless(feq(dims.Length(), 0.0)) self.failUnless(feq(offset.Length(), 0.0)) rdshp.EncodeShape(m, grd, 0) ovect = grd.GetOccupancyVect() self.failUnless(ovect.GetTotalVal() == 9275) geom.WriteGridToFile(grd, '1oir_shape.grd') m = Chem.MolFromMolFile(fileN) lc, uc = rdshp.ComputeConfBox(m.GetConformer()) rdmt.CanonicalizeMol(m) lc1, uc1 = rdshp.ComputeConfBox(m.GetConformer()) lc2, uc2 = rdshp.ComputeUnionBox((lc, uc), (lc1, uc1)) lc -= geom.Point3D(-4.353, 16.829, 2.782) uc -= geom.Point3D(9.574, 33.799, 12.557) self.failUnless(feq(lc.Length(), 0.0)) self.failUnless(feq(uc.Length(), 0.0)) lc1 -= geom.Point3D(-10.7519, -6.0778, -3.0123) uc1 -= geom.Point3D(8.7163, 5.3279, 3.1621) self.failUnless(feq(lc1.Length(), 0.0)) self.failUnless(feq(uc1.Length(), 0.0)) lc2 -= geom.Point3D(-10.7519, -6.0778, -3.01226) uc2 -= geom.Point3D(9.574, 33.799, 12.557) self.failUnless(feq(lc2.Length(), 0.0)) self.failUnless(feq(uc2.Length(), 0.0))
def place_followup(self, mol: Chem.Mol = None, atom_map: Optional[Dict] = None) -> Chem.Mol: """ This method places the atoms with known mapping and places the 'uniques' (novel) via an aligned mol (the 'sextant') :param mol: :param atom_map: something that get_mcs_mapping would return. :return: """ # Note none of this malarkey: AllChem.MMFFOptimizeMolecule(ref) # prealignment if mol is None: mol = self.initial_mol sextant = Chem.Mol(mol) Chem.SanitizeMol(sextant) AllChem.EmbedMolecule(sextant) AllChem.MMFFOptimizeMolecule(sextant) ###################################################### # mapping retrieval and sextant alignment # variables: atom_map sextant -> uniques if atom_map is None: atom_map, mode = self.get_mcs_mapping(mol, self.chimera) log.trace( f"followup-chimera' = { {**{k: str(v) for k, v in mode.items()}, 'N_atoms': len(atom_map)} }" ) rdMolAlign.AlignMol(sextant, self.chimera, atomMap=list(atom_map.items()), maxIters=500) # debug print if self._debug_draw: self.draw_nicely(mol, highlightAtoms=dict(atom_map).keys()) self.draw_nicely(self.chimera, highlightAtoms=dict(atom_map).values()) # place atoms that have a known location putty = Chem.Mol(sextant) pconf = putty.GetConformer() chimera_conf = self.chimera.GetConformer() uniques = set() # unique atoms in followup for i in range(putty.GetNumAtoms()): p_atom = putty.GetAtomWithIdx(i) p_atom.SetDoubleProp('_Stdev', 0.) p_atom.SetProp('_Origin', 'none') if i in atom_map: ci = atom_map[i] c_atom = self.chimera.GetAtomWithIdx(ci) if c_atom.HasProp('_Stdev'): stdev = c_atom.GetDoubleProp('_Stdev') origin = c_atom.GetAtomWithIdx(ci).GetProp('_Origin') p_atom.SetDoubleProp('_Stdev', stdev) p_atom.SetProp('_Origin', origin) pconf.SetAtomPosition(i, chimera_conf.GetAtomPosition(ci)) else: uniques.add(i) ###################################################### # I be using a sextant for dead reckoning! # variables: sextant unique team categories = self._categorise(sextant, uniques) # debug print if self._debug_draw: print('internal', categories['internals']) done_already = [] # multi-attachment issue. for unique_idx in categories['pairs']: # attachment unique indices # check the index was not done already (by virtue of a second attachment) if unique_idx in done_already: continue # get other attachments if any. team = self._recruit_team(mol, unique_idx, categories['uniques']) other_attachments = ( team & set(categories['pairs'].keys())) - {unique_idx} sights = set() # atoms to align against for att_idx in [unique_idx] + list(other_attachments): for pd in categories['pairs'][att_idx]: first_sight = pd['idx'] sights.add((first_sight, first_sight)) neighs = [ i.GetIdx() for i in sextant.GetAtomWithIdx( first_sight).GetNeighbors() if i.GetIdx() not in uniques ] for n in neighs: sights.add((n, n)) if self.attachement and list(categories['dummies']) and list( categories['dummies'])[0] in team: r = list(categories['dummies'])[0] pconf.SetAtomPosition( r, self.attachement.GetConformer().GetAtomPosition(0)) sights.add((r, r)) rdMolAlign.AlignMol(sextant, putty, atomMap=list(sights), maxIters=500) sconf = sextant.GetConformer() # debug print if self._debug_draw: print(f'alignment atoms for {unique_idx} ({team}): {sights}') self.draw_nicely(sextant, highlightAtoms=[a for a, b in sights]) # copy position over for atom_idx in team: pconf.SetAtomPosition(atom_idx, sconf.GetAtomPosition(atom_idx)) # the ring problem does not apply here but would result in rejiggling atoms. for other in other_attachments: done_already.append(other) # complete AllChem.SanitizeMol(putty) return putty # positioned_mol
def place_followup(self, mol: Chem.Mol = None) -> Chem.Mol: # Note none of this malarkey: AllChem.MMFFOptimizeMolecule(ref) # prealignment if mol is None: mol = self.initial_mol sextant = Chem.Mol(mol) Chem.SanitizeMol(sextant) AllChem.EmbedMolecule(sextant) AllChem.MMFFOptimizeMolecule(sextant) atom_map, mode = self.get_mcs_mapping(mol, self.chimera) self.logbook['followup-chimera'] = { **{k: str(v) for k, v in mode.items()}, 'N_atoms': len(atom_map) } rdMolAlign.AlignMol(sextant, self.chimera, atomMap=list(atom_map.items()), maxIters=500) if self._debug_draw: self.draw_nicely(mol, highlightAtoms=dict(atom_map).keys()) self.draw_nicely(self.chimera, highlightAtoms=dict(atom_map).values()) putty = Chem.Mol(sextant) pconf = putty.GetConformer() chimera_conf = self.chimera.GetConformer() uniques = set() # unique atoms in followup for i in range(putty.GetNumAtoms()): if i in atom_map: pconf.SetAtomPosition( i, chimera_conf.GetAtomPosition(atom_map[i])) else: uniques.add(i) # we be using a sextant for dead reckoning! categories = self._categorise(sextant, uniques) if self._debug_draw: print('internal', categories['internals']) for unique_idx in categories['pairs']: # attachment unique indices sights = set() for pd in categories['pairs'][unique_idx]: first_sight = pd['idx'] sights.add((first_sight, first_sight)) neighs = [ i.GetIdx() for i in sextant.GetAtomWithIdx( first_sight).GetNeighbors() if i.GetIdx() not in uniques ] for n in neighs: sights.add((n, n)) team = self._recruit_team(mol, unique_idx, categories) if self.attachement and list(categories['dummies'])[0] in team: r = list(categories['dummies'])[0] pconf.SetAtomPosition( r, self.attachement.GetConformer().GetAtomPosition(0)) sights.add((r, r)) rdMolAlign.AlignMol(sextant, putty, atomMap=list(sights), maxIters=500) sconf = sextant.GetConformer() if self._debug_draw: print(f'alignment atoms for {unique_idx} ({team}): {sights}') self.draw_nicely(sextant, highlightAtoms=[a for a, b in sights]) for atom_idx in team: pconf.SetAtomPosition(atom_idx, sconf.GetAtomPosition(atom_idx)) AllChem.SanitizeMol(putty) return putty
mols = [] for conformer_coords in coords: num_atoms = int(np.all(conformer_coords != 0, axis=1).sum()) conformer = Chem.Conformer(num_atoms) for atom_num in range(num_atoms): conformer.SetAtomPosition( atom_num, conformer_coords[atom_num].tolist()) current_mol = copy.deepcopy(mol) current_mol.AddConformer(conformer) mols.append(current_mol) mol_rmsds = [] rmsds = [] for i in range(len(mols) - 1): for k in range(i, len(mols)): rmsd = rdMolAlign.AlignMol(mols[k], mols[i]) rmsds.append(rmsd) #rdMolAlign.AlignMolConformers(mol, confIds=range(i, len(conformers)), RMSlist=rmsds) mol_rmsds = np.array(rmsds) mean_rmsd = mol_rmsds.mean() median_rmsd = np.median(mol_rmsds) std_rmsd = mol_rmsds.std() mean_rmsds.append(mean_rmsd) median_rmsds.append(median_rmsd) std_rmsds.append(std_rmsd) heavy_atoms.append(ha) except: print('Failed', flush=True) np.save(os.path.join(args.savedir, 'mean_rmsds.npy'), np.array(mean_rmsds))
def summ_search(mol, name, args, log, dup_data, dup_data_idx, coord_Map=None, alg_Map=None, mol_template=None): '''embeds core conformers, then optimizes and filters based on RMSD. Finally the rotatable torsions are systematically rotated''' sdwriter = Chem.SDWriter(name + '_' + 'rdkit' + args.output) Chem.SanitizeMol(mol) mol = Chem.AddHs(mol) mol.SetProp("_Name", name) # detects and applies auto-detection of initial number of conformers if args.sample == 'auto': initial_confs = int(auto_sampling(args.auto_sample, mol, log)) else: initial_confs = int(args.sample) # dup_data.at[dup_data_idx, 'Molecule'] = name dup_data.at[dup_data_idx, 'RDKIT-Initial-samples'] = initial_confs if args.nodihedrals == False: rotmatches = getDihedralMatches(mol, args.heavyonly, log) else: rotmatches = [] if len(rotmatches) > args.max_torsions: log.write("x Too many torsions (%d). Skipping %s" % (len(rotmatches), (name + args.output))) status = -1 else: if coord_Map == None and alg_Map == None and mol_template == None: if args.etkdg: ps = Chem.ETKDG() ps.randomSeed = args.seed ps.ignoreSmoothingFailures = True ps.numThreads = 0 cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, params=ps) else: cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, ignoreSmoothingFailures=True, randomSeed=args.seed, numThreads=0) if len(cids) == 0 or len(cids) == 1 and initial_confs != 1: log.write( "o conformers initially sampled with random coordinates") cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, randomSeed=args.seed, useRandomCoords=True, boxSizeMult=10.0, ignoreSmoothingFailures=True, numZeroFail=1000, numThreads=0) if args.verbose: log.write("o " + str(len(cids)) + " conformers initially sampled") # case of embed for templates else: if args.etkdg: ps = Chem.ETKDG() ps.randomSeed = args.seed ps.coordMap = coord_Map ps.ignoreSmoothingFailures = True ps.numThreads = 0 cids = rdDistGeom.EmbedMultipleConfs(mol, initial_confs, params=ps) else: cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, randomSeed=args.seed, ignoreSmoothingFailures=True, coordMap=coord_Map, numThreads=0) if len(cids) == 0 or len(cids) == 1 and initial_confs != 1: log.write( "o conformers initially sampled with random coordinates") cids = rdDistGeom.EmbedMultipleConfs( mol, initial_confs, randomSeed=args.seed, useRandomCoords=True, boxSizeMult=10.0, numZeroFail=1000, ignoreSmoothingFailures=True, coordMap=coord_Map, numThreads=0) if args.verbose: log.write("o " + str(len(cids)) + " conformers initially sampled") #energy minimize all to get more realistic results #identify the atoms and decide Force Field for atom in mol.GetAtoms(): if atom.GetAtomicNum() > 36: #upto Kr for MMFF, if not use UFF args.ff = "UFF" #log.write("UFF is used because there are atoms that MMFF doesn't recognise") if args.verbose: log.write("o Optimizing " + str(len(cids)) + " initial conformers with" + args.ff) if args.verbose: if args.nodihedrals == False: log.write("o Found " + str(len(rotmatches)) + " rotatable torsions") # for [a,b,c,d] in rotmatches: # log.write(' '+mol.GetAtomWithIdx(a).GetSymbol()+str(a+1)+ mol.GetAtomWithIdx(b).GetSymbol()+str(b+1)+ mol.GetAtomWithIdx(c).GetSymbol()+str(c+1)+mol.GetAtomWithIdx(d).GetSymbol()+str(d+1)) else: log.write("o Systematic torsion rotation is set to OFF") cenergy, outmols = [], [] bar = IncrementalBar('o Minimizing', max=len(cids)) for i, conf in enumerate(cids): if coord_Map == None and alg_Map == None and mol_template == None: if args.ff == "MMFF": GetFF = Chem.MMFFGetMoleculeForceField( mol, Chem.MMFFGetMoleculeProperties(mol), confId=conf) elif args.ff == "UFF": GetFF = Chem.UFFGetMoleculeForceField(mol, confId=conf) else: log.write(' Force field {} not supported!'.format( args.ff)) sys.exit() GetFF.Initialize() converged = GetFF.Minimize(maxIts=args.opt_steps_RDKit) energy = GetFF.CalcEnergy() cenergy.append(GetFF.CalcEnergy()) #if args.verbose: # log.write("- conformer", (i+1), "optimized: ", args.ff, "energy", GetFF.CalcEnergy()) #id template realign before doing calculations else: num_atom_match = mol.GetSubstructMatch(mol_template) # Force field parameters if args.ff == "MMFF": GetFF = lambda mol, confId=conf: Chem.MMFFGetMoleculeForceField( mol, Chem.MMFFGetMoleculeProperties(mol), confId=conf) elif args.ff == "UFF": GetFF = lambda mol, confId=conf: Chem.UFFGetMoleculeForceField( mol, confId=conf) else: log.write(' Force field {} not supported!'.format( options.ff)) sys.exit() getForceField = GetFF # clean up the conformation ff_temp = getForceField(mol, confId=conf) for k, idxI in enumerate(num_atom_match): for l in range(k + 1, len(num_atom_match)): idxJ = num_atom_match[l] d = coord_Map[idxI].Distance(coord_Map[idxJ]) ff_temp.AddDistanceConstraint(idxI, idxJ, d, d, 10000) ff_temp.Initialize() #reassignned n from 4 to 10 for better embed and minimzation n = 10 more = ff_temp.Minimize() while more and n: more = ff_temp.Minimize() n -= 1 energy = ff_temp.CalcEnergy() # rotate the embedded conformation onto the core_mol: rms = rdMolAlign.AlignMol(mol, mol_template, prbCid=conf, atomMap=alg_Map, reflect=True, maxIters=100) # elif len(num_atom_match) == 5: # ff_temp = GetFF(mol, confId=conf) # conf_temp = mol_template.GetConformer() # for k in range(mol_template.GetNumAtoms()): # p = conf_temp.GetAtomPosition(k) # q = mol.GetConformer(conf).GetAtomPosition(k) # pIdx = ff_temp.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1 # ff_temp.AddDistanceConstraint(pIdx, num_atom_match[k], 0, 0, 10000) # ff_temp.Initialize() # n = 10 # more = ff_temp.Minimize(energyTol=1e-6, forceTol=1e-5) # while more and n: # more = ff_temp.Minimize(energyTol=1e-6, forceTol=1e-5) # n -= 1 # # realign # energy = ff_temp.CalcEnergy() # rms = rdMolAlign.AlignMol(mol, mol_template,prbCid=conf, atomMap=alg_Map,reflect=True,maxIters=50) cenergy.append(energy) # outmols is gonna be a list containing "initial_confs" mol objects with "initial_confs" # conformers. We do this to SetProp (Name and Energy) to the different conformers # and log.write in the SDF file. At the end, since all the mol objects has the same # conformers, but the energies are different, we can log.write conformers to SDF files # with the energies of the parent mol objects. We measured the computing time and # it's the same as using only 1 parent mol object with 10 conformers, but we couldn'temp # SetProp correctly pmol = PropertyMol.PropertyMol(mol) outmols.append(pmol) bar.next() bar.finish() for i, cid in enumerate(cids): outmols[cid].SetProp('_Name', name + ' conformer ' + str(i + 1)) outmols[cid].SetProp('Energy', cenergy[cid]) cids = list(range(len(outmols))) sortedcids = sorted(cids, key=lambda cid: cenergy[cid]) log.write("\n\no Filters after intial embedding of " + str(initial_confs) + " conformers") selectedcids, selectedcids_initial, eng_dup, eng_rms_dup = [], [], -1, -1 bar = IncrementalBar('o Filtering based on energy (pre-filter)', max=len(sortedcids)) for i, conf in enumerate(sortedcids): # This keeps track of whether or not your conformer is unique excluded_conf = False # include the first conformer in the list to start the filtering process if i == 0: selectedcids_initial.append(conf) # check rmsd for seenconf in selectedcids_initial: E_diff = abs(cenergy[conf] - cenergy[seenconf]) # in kcal/mol if E_diff < args.initial_energy_threshold: eng_dup += 1 excluded_conf = True break if excluded_conf == False: if conf not in selectedcids_initial: selectedcids_initial.append(conf) bar.next() bar.finish() if args.verbose == True: log.write("o " + str(eng_dup) + " Duplicates removed pre-energy filter (E < " + str(args.initial_energy_threshold) + " kcal/mol )") #reduce to unique set if args.verbose: log.write("o Removing duplicate conformers ( RMSD < " + str(args.rms_threshold) + " and E difference < " + str(args.energy_threshold) + " kcal/mol)") bar = IncrementalBar('o Filtering based on energy and rms', max=len(selectedcids_initial)) #check rmsd for i, conf in enumerate(selectedcids_initial): #set torsions to same value for m in rotmatches: rdMolTransforms.SetDihedralDeg( outmols[conf].GetConformer(conf), *m, 180.0) # This keeps track of whether or not your conformer is unique excluded_conf = False # include the first conformer in the list to start the filtering process if i == 0: selectedcids.append(conf) # check rmsd for seenconf in selectedcids: E_diff = abs(cenergy[conf] - cenergy[seenconf]) # in kcal/mol if E_diff < args.energy_threshold: rms = get_conf_RMS(outmols[conf], outmols[conf], seenconf, conf, args.heavyonly, args.max_matches_RMSD, log) if rms < args.rms_threshold: excluded_conf = True eng_rms_dup += 1 break if excluded_conf == False: if conf not in selectedcids: selectedcids.append(conf) bar.next() bar.finish() # unique_mols, unique_energies = [],[] # for id in selectedcids: # unique_mols.append(outmols[id]) # unique_energies.append(cenergy[id]) # log.write(unique_mols[0:2].GetConformers()[0].GetPositions()) if args.verbose == True: log.write("o " + str(eng_rms_dup) + " Duplicates removed (RMSD < " + str(args.rms_threshold) + " / E < " + str(args.energy_threshold) + " kcal/mol) after rotation") if args.verbose: log.write("o " + str(len(selectedcids)) + " unique (ignoring torsions) starting conformers remain") dup_data.at[dup_data_idx, 'RDKit-energy-duplicates'] = eng_dup dup_data.at[dup_data_idx, 'RDKit-RMS-and-energy-duplicates'] = eng_rms_dup dup_data.at[dup_data_idx, 'RDKIT-Unique-conformers'] = len(selectedcids) # now exhaustively drive torsions of selected conformers n_confs = int(len(selectedcids) * (360 / args.degree)**len(rotmatches)) if args.verbose and len(rotmatches) != 0: log.write("\n\no Systematic generation of " + str(n_confs) + " confomers") bar = IncrementalBar( 'o Generating conformations based on dihedral rotation', max=len(selectedcids)) else: bar = IncrementalBar('o Generating conformations', max=len(selectedcids)) total = 0 for conf in selectedcids: #log.write(outmols[conf]) total += genConformer_r(outmols[conf], conf, 0, rotmatches, args.degree, sdwriter, args, outmols[conf].GetProp('_Name'), log) bar.next() bar.finish() if args.verbose and len(rotmatches) != 0: log.write("o %d total conformations generated" % total) status = 1 sdwriter.close() #getting the energy from and mols after rotations if len(rotmatches) != 0: rdmols = Chem.SDMolSupplier(name + '_' + 'rdkit' + args.output, removeHs=False) if rdmols is None: log.write("Could not open " + name + args.output) sys.exit(-1) bar = IncrementalBar( 'o Filtering based on energy and rms after rotation of dihedrals', max=len(rdmols)) sdwriter = Chem.SDWriter(name + '_' + 'rdkit' + '_' + 'rotated' + args.output) rd_count = 0 rd_selectedcids, rd_dup_energy, rd_dup_rms_eng = [], -1, 0 for i in range(len(rdmols)): # This keeps track of whether or not your conformer is unique excluded_conf = False # include the first conformer in the list to start the filtering process if rd_count == 0: rd_selectedcids.append(i) if args.metal_complex == True: for atom in rdmols[i].GetAtoms(): if atom.GetSymbol() == 'I' and ( len(atom.GetBonds()) == 6 or len(atom.GetBonds()) == 5 or len(atom.GetBonds()) == 4 or len(atom.GetBonds()) == 3 or len(atom.GetBonds()) == 2): for el in elementspt: if el.symbol == args.metal: atomic_number = el.number atom.SetAtomicNum(atomic_number) sdwriter.write(rdmols[i]) # Only the first ID gets included rd_count = 1 # check rmsd for j in rd_selectedcids: if abs( float(rdmols[i].GetProp('Energy')) - float(rdmols[j].GetProp('Energy')) ) < args.initial_energy_threshold: # comparison in kcal/mol excluded_conf = True rd_dup_energy += 1 break if abs( float(rdmols[i].GetProp('Energy')) - float(rdmols[j].GetProp('Energy')) ) < args.energy_threshold: # in kcal/mol rms = get_conf_RMS(rdmols[i], rdmols[j], -1, -1, args.heavyonly, args.max_matches_RMSD, log) if rms < args.rms_threshold: excluded_conf = True rd_dup_rms_eng += 1 break if excluded_conf == False: if args.metal_complex == True: for atom in rdmols[i].GetAtoms(): if atom.GetSymbol() == 'I' and ( len(atom.GetBonds()) == 6 or len(atom.GetBonds()) == 5 or len(atom.GetBonds()) == 4 or len(atom.GetBonds()) == 3 or len(atom.GetBonds()) == 2): for el in elementspt: if el.symbol == args.metal: atomic_number = el.number atom.SetAtomicNum(atomic_number) sdwriter.write(rdmols[i]) if i not in rd_selectedcids: rd_selectedcids.append(i) bar.next() bar.finish() sdwriter.close() if args.verbose == True: log.write("o " + str(rd_dup_energy) + " Duplicates removed initial energy ( E < " + str(args.initial_energy_threshold) + " kcal/mol )") if args.verbose == True: log.write("o " + str(rd_dup_rms_eng) + " Duplicates removed (RMSD < " + str(args.rms_threshold) + " / E < " + str(args.energy_threshold) + " kcal/mol) after rotation") if args.verbose == True: log.write("o " + str(len(rd_selectedcids)) + " unique (after torsions) conformers remain") #filtering process after rotations dup_data.at[dup_data_idx, 'RDKIT-Rotated-conformers'] = total dup_data.at[dup_data_idx, 'RDKIT-Rotated-Unique-conformers'] = len(rd_selectedcids) return status
def testMinimizeOnly(self): m = Chem.MolFromMolBlock(''' Mrv2014 08052005392D 0 0 0 0 0 999 V3000 M V30 BEGIN CTAB M V30 COUNTS 14 15 0 0 0 M V30 BEGIN ATOM M V30 1 C -1.4287 -1.4523 0 0 M V30 2 C -2.9638 -1.5752 0 0 M V30 3 C -3.8377 -0.3072 0 0 M V30 4 C -3.1766 1.0837 0 0 M V30 5 C -1.6416 1.2066 0 0 M V30 6 C -0.7675 -0.0614 0 0 M V30 7 C 0.7675 0.0614 0 0 M V30 8 C 1.6416 -1.2066 0 0 M V30 9 C 0.9804 -2.5975 0 0 M V30 10 N 3.1766 -1.0837 0 0 M V30 11 C 3.8377 0.3072 0 0 M V30 12 C 2.9638 1.5752 0 0 M V30 13 C 1.4287 1.4523 0 0 M V30 14 F -0.5548 -2.7203 0 0 M V30 END ATOM M V30 BEGIN BOND M V30 1 2 1 2 M V30 2 1 2 3 M V30 3 2 3 4 M V30 4 1 4 5 M V30 5 2 5 6 M V30 6 1 6 7 M V30 7 2 7 8 M V30 8 1 8 9 M V30 9 1 8 10 M V30 10 2 10 11 M V30 11 1 11 12 M V30 12 2 12 13 M V30 13 1 6 1 M V30 14 1 13 7 M V30 15 1 1 14 M V30 END BOND M V30 END CTAB M END ''') ref = Chem.MolFromMolBlock(''' RDKit 2D 14 15 0 0 0 0 0 0 0 0999 V2000 -1.5379 -1.4859 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -3.1218 -1.5958 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -3.9595 -0.2554 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -3.2641 1.1663 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -1.6778 1.2217 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -0.7941 -0.0886 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.7983 0.0383 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1.7524 -1.2209 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1.1246 -2.6443 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 3.3306 -1.0787 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 3.9439 0.3747 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 3.0337 1.6656 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1.4617 1.4692 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -0.6924 -2.7917 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 1 2 2 0 2 3 1 0 3 4 2 0 4 5 1 0 5 6 2 0 6 7 1 0 7 8 2 0 8 9 1 0 8 10 1 0 10 11 2 0 11 12 1 0 12 13 2 0 6 1 1 0 13 7 1 0 1 14 1 0 M END''') ps = rdCoordGen.CoordGenParams() ps.minimizeOnly = True m2 = Chem.Mol(m) rdCoordGen.AddCoords(m2, ps) self.assertGreater(rdMolAlign.AlignMol(m, ref), 0.1) self.assertLess(rdMolAlign.AlignMol(m2, ref), 0.1)
def testMinimizeOnly(self): m = Chem.MolFromMolBlock(''' Mrv2014 03252113022D 14 15 0 0 0 0 999 V2000 -0.7654 -0.7780 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -1.5877 -0.8439 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -2.0559 -0.1646 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -1.7017 0.5805 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -0.8794 0.6464 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -0.4112 -0.0329 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.4112 0.0329 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.8794 -0.6464 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.7771 -1.4237 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1.7017 -0.5805 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 2.0559 0.1646 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1.5877 0.8439 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.7654 0.7780 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -0.7654 -1.6519 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 1 2 2 0 0 0 0 2 3 1 0 0 0 0 3 4 2 0 0 0 0 4 5 1 0 0 0 0 5 6 2 0 0 0 0 6 7 1 0 0 0 0 7 8 2 0 0 0 0 8 9 1 0 0 0 0 8 10 1 0 0 0 0 10 11 2 0 0 0 0 11 12 1 0 0 0 0 12 13 2 0 0 0 0 6 1 1 0 0 0 0 13 7 1 0 0 0 0 1 14 1 0 0 0 0 M END ''') ref = Chem.MolFromMolBlock(''' RDKit 2D 14 15 0 0 0 0 0 0 0 0999 V2000 -0.5957 -0.8221 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -1.4185 -0.8441 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -1.8520 -0.1434 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -1.4595 0.5814 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -0.6368 0.6072 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -0.2063 -0.0955 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.2087 -0.0625 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.7102 -0.7158 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.4040 -1.4774 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1.5259 -0.6099 0.0000 N 0 0 0 0 0 0 0 0 0 0 0 0 1.8401 0.1516 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 1.3405 0.8062 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 0.5252 0.6981 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0 -0.1735 -1.5240 0.0000 F 0 0 0 0 0 0 0 0 0 0 0 0 1 2 2 0 2 3 1 0 3 4 2 0 4 5 1 0 5 6 2 0 6 7 1 0 7 8 2 0 8 9 1 0 8 10 1 0 10 11 2 0 11 12 1 0 12 13 2 0 6 1 1 0 13 7 1 0 1 14 1 0 M END ''') ps = rdCoordGen.CoordGenParams() ps.minimizeOnly = True m2 = Chem.Mol(m) rdCoordGen.AddCoords(m2, ps) self.assertGreater(rdMolAlign.AlignMol(m, ref), 0.1) self.assertLess(rdMolAlign.AlignMol(m2, ref), 0.1)
def run_comparison(references=None, conformers=None): references = args.references conformers = args.conformers templates = [] lowest_rmsd = [] for reference in AllChem.SDMolSupplier(references): if reference.HasProp('_Name'): ref_id = reference.GetProp('_Name').split('_')[0] templates.append([ref_id, reference]) mol_RMSD = [] mol_references = [] mol_O3A = [] mol_minimized = [] for refer in templates: try: print('Processing:', refer[0]) conformer = [] rmsd = [] similarity_3D = [] O3A_result = [] t_angles = [] r_gyration = [] i_energy = [] f_energy = [] rmsd_minimized = [] for mol in AllChem.SDMolSupplier(conformers): if refer[0] == mol.GetProp('_Name').split('_')[0]: mol_copy = mol name = str(mol.GetProp('_Name')) conformer.append(name) #Aligment and RMSD calculation based on Maximum Common Structure SMARTS r = rdFMCS.FindMCS([mol, refer[1]]) a = refer[1].GetSubstructMatch( Chem.MolFromSmarts(r.smartsString)) b = mol.GetSubstructMatch( Chem.MolFromSmarts(r.smartsString)) mapa = list(zip(b, a)) rms = rdMolAlign.AlignMol(mol, refer[1], atomMap=mapa) rmsd.append(rms) mol.SetProp('RMSD', str(rms)) mol_RMSD.append(mol) mol_references.append(refer[1]) # Tortional fingerprint r_list = Chem.TorsionFingerprints.CalculateTorsionLists( refer[1]) r_angles = Chem.TorsionFingerprints.CalculateTorsionAngles( refer[1], r_list[0], r_list[1]) c_list = Chem.TorsionFingerprints.CalculateTorsionLists( mol) c_angles = Chem.TorsionFingerprints.CalculateTorsionAngles( mol, c_list[0], c_list[1]) torsion = Chem.TorsionFingerprints.CalculateTFD( r_angles, c_angles) t_angles.append(torsion) #Radious of gyration radious = Descriptors3D.RadiusOfGyration(mol) r_gyration.append(radious) mp = AllChem.MMFFGetMoleculeProperties(mol) mmff = AllChem.MMFFGetMoleculeForceField(mol, mp) energy_value = mmff.CalcEnergy() i_energy.append(energy_value) # Energy and minimization m2 = mol AllChem.EmbedMolecule(m2) AllChem.MMFFOptimizeMolecule(m2, mmffVariant='MMFF94') mp = AllChem.MMFFGetMoleculeProperties(m2) mmff = AllChem.MMFFGetMoleculeForceField(m2, mp) energy_value_minimized = mmff.CalcEnergy() f_energy.append(energy_value_minimized) m3 = Chem.RemoveHs(m2) r = rdFMCS.FindMCS([m3, refer[1]]) a = refer[1].GetSubstructMatch( Chem.MolFromSmarts(r.smartsString)) b = m3.GetSubstructMatch(Chem.MolFromSmarts( r.smartsString)) mapa = list(zip(b, a)) rms_2 = rdMolAlign.AlignMol(m3, refer[1], atomMap=mapa) rmsd_minimized.append(rms_2) m3.SetProp('RMSD', str(rms_2)) mol_minimized.append(m3) O3A = rdMolAlign.GetO3A(mol_copy, refer[1]) align = O3A.Align() O3A_result.append(align) mol_copy.SetProp('O3A', str(align)) mol_O3A.append(mol_copy) d = { 'conformer': pd.Series(conformer), 'RMSD': pd.Series(rmsd), 'O3A_value': pd.Series(O3A_result), 'Torsional_Fingerprint': pd.Series(t_angles), 'Radius_of_Gyration': pd.Series(r_gyration), 'Initial_Energy': pd.Series(i_energy), 'Minimization_Energy': pd.Series(f_energy), 'RMSD_after_minimization': pd.Series(rmsd_minimized) } table = pd.DataFrame(d) sort = table.sort_values('RMSD', ascending=True) sort = sort.reset_index(drop=True) sort.to_csv(refer[0] + '.csv') print('data in file:', refer[0] + '.csv') print('-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- ') rog_diff = (float( max(sort['Radius_of_Gyration']) - sort['Radius_of_Gyration'][0])) lowest_rmsd.append( (sort['conformer'][0], sort['RMSD'][0], sort['O3A_value'][0], sort['Torsional_Fingerprint'][0], sort['Radius_of_Gyration'][0], sort['Initial_Energy'][0], sort['Minimization_Energy'][0], sort['RMSD_after_minimization'][0], rog_diff)) except Exception: print('Something wrong with this reference or conformer') print('Omitting') pass print( 'SAVING DATA OF LOWEST RMSD OF CONFORMERS ... ... ... ... ... ... ... ...' ) summary = pd.DataFrame(data=lowest_rmsd, columns=[ 'Conformer', 'RMSD', 'O3A_value', 'Torsional_Fingerprint', 'Radius_of_Gyration', 'Initial_Energy', 'Minimization Energy', 'RMSD_after_minimization', 'Dif_Radious_of_Gyration' ]) summary.to_csv('Lowest_RMSD_Data.csv') print('Lowest RMSD Data in file: Lowest_RMSD_Data.csv') print('***************************************************') print( 'SAVING STRUCTURES (RMSD, O3A, and MINIMIZATION) ... ... ... ... ... ... ... ... ...' ) output_Ref = Chem.SDWriter('Aligned_Refrences.sdf') output_RMSD = Chem.SDWriter('RMSD_alignment.sdf') output_O3A = Chem.SDWriter('O3A_alignment.sdf') output_Min = Chem.SDWriter('Minimization.sdf') mol_references = list(set(mol_references)) [output_Ref.write(element) for element in mol_references] output_Ref.close() [output_RMSD.write(element) for element in mol_RMSD] output_RMSD.close() [output_O3A.write(element) for element in mol_O3A] output_O3A.close() [output_Min.write(element) for element in mol_minimized] output_Min.close() print( 'Structures in files: Aligned_Refrences.sdf, RMSD_alignment.sdf, O3A_alignment.sdf, and Minimization.sdf ' ) print( 'ALL THE CALCULATIONS DONE, FILES SAVED. THANK YOU FOR USING THIS SCRIPT' )
def ConstrainedEmbed(mol, core, useTethers=True, coreConfId=-1, randomseed=2342, getForceField=AllChem.UFFGetMoleculeForceField, **kwargs): # # Copyright (C) 2006-2017 greg Landrum and Rational Discovery LLC # # @@ All Rights Reserved @@ # This file is part of the RDKit. # The contents are covered by the terms of the BSD license # which is included in the file license.txt, found at the root # of the RDKit source tree. # force_constant = 1000. match = mol.GetSubstructMatch(core) if not match: raise ValueError("molecule doesn't match the core") coordMap = {} coreConf = core.GetConformer(coreConfId) for i, idxI in enumerate(match): corePtI = coreConf.GetAtomPosition(i) coordMap[idxI] = corePtI if "." in Chem.MolToSmiles(mol): ci = AllChem.EmbedMolecule(mol, randomSeed=randomseed, **kwargs) #jhj else: ci = AllChem.EmbedMolecule(mol, coordMap=coordMap, randomSeed=randomseed, **kwargs) if ci < 0: raise ValueError('Could not embed molecule.') algMap = [(j, i) for i, j in enumerate(match)] if not useTethers: # clean up the conformation ff = getForceField(mol, confId=0) for i, idxI in enumerate(match): for j in range(i + 1, len(match)): idxJ = match[j] d = coordMap[idxI].Distance(coordMap[idxJ]) ff.AddDistanceConstraint(idxI, idxJ, d, d, force_constant) ff.Initialize() n = 4 more = ff.Minimize() while more and n: more = ff.Minimize() n -= 1 # rotate the embedded conformation onto the core: rms = rdMolAlign.AlignMol(mol, core, atomMap=algMap) else: # rotate the embedded conformation onto the core: rms = rdMolAlign.AlignMol(mol, core, atomMap=algMap) ff = getForceField(mol, confId=0) conf = core.GetConformer() for i in range(core.GetNumAtoms()): p = conf.GetAtomPosition(i) q = mol.GetConformer().GetAtomPosition(i) pIdx = ff.AddExtraPoint(p.x, p.y, p.z, fixed=True) - 1 ff.AddDistanceConstraint(pIdx, match[i], 0, 0, force_constant) ff.Initialize() n = 4 more = ff.Minimize(energyTol=1e-4, forceTol=1e-3) while more and n: more = ff.Minimize(energyTol=1e-4, forceTol=1e-3) n -= 1 # realign rms = rdMolAlign.AlignMol(mol, core, atomMap=algMap) mol.SetProp('EmbedRMS', str(rms)) return mol
def GenConstConf(Heads, Docked_Heads, Head_Linkers, output_sdf, Anchor_A, v_atoms_sdf, n=100, homo_protac=False): writer = Chem.SDWriter(output_sdf) with open(Head_Linkers, 'r') as f: head_linkers = [Chem.MolFromSmiles(f.readline().split()[0])] #loading the heads sdf files HeadA = Chem.SDMolSupplier(Heads[0])[0] HeadB = Chem.SDMolSupplier(Heads[1])[0] docked_heads = Chem.SDMolSupplier(Docked_Heads)[0] #virtual atoms around the center of mass for the neighbor atom alignment num_atoms = docked_heads.GetConformer().GetNumAtoms() x = [] y = [] z = [] for i in range(num_atoms): x.append(docked_heads.GetConformer().GetAtomPosition(i).x) y.append(docked_heads.GetConformer().GetAtomPosition(i).y) z.append(docked_heads.GetConformer().GetAtomPosition(i).z) v1 = Point3D(sum(x) / num_atoms, sum(y) / num_atoms, sum(z) / num_atoms) v2 = Point3D( sum(x) / num_atoms + 1, sum(y) / num_atoms, sum(z) / num_atoms) v3 = Point3D( sum(x) / num_atoms, sum(y) / num_atoms + 1, sum(z) / num_atoms) virtual_atoms = Chem.MolFromSmarts('[#23][#23][#23]') Chem.rdDistGeom.EmbedMolecule(virtual_atoms) virtual_atoms.GetConformer().SetAtomPosition(1, v1) virtual_atoms.GetConformer().SetAtomPosition(0, v2) virtual_atoms.GetConformer().SetAtomPosition(2, v3) v_writer = Chem.SDWriter(v_atoms_sdf) v_writer.write(virtual_atoms) #h**o protacs are protacs with the same binder twice, causing self degradation of an E3 ligase if homo_protac: docked_A = docked_heads.GetSubstructMatches(HeadA)[0] docked_B = docked_heads.GetSubstructMatches(HeadB)[1] else: docked_A = docked_heads.GetSubstructMatch(HeadA) docked_B = docked_heads.GetSubstructMatch(HeadB) for head_linker in head_linkers: Chem.AddHs(head_linker) if homo_protac: head_A = head_linker.GetSubstructMatches(HeadA)[0] head_B = head_linker.GetSubstructMatches(HeadB)[1] else: head_A_list = head_linker.GetSubstructMatches(HeadA, uniquify=False) head_B_list = head_linker.GetSubstructMatches(HeadB, uniquify=False) i = 0 seed = 0 while i < n: if seed > 10 * n: break if seed > n and i == 0: break seed += 1 random.seed(seed) head_A = random.choice(head_A_list) head_B = random.choice(head_B_list) #amap for final alignment amap = [] for j in range(len(docked_A)): amap.append((head_A[j], docked_A[j])) for j in range(len(docked_B)): amap.append((head_B[j], docked_B[j])) #the constraints for the conformation generation using the two docked heads cmap = { head_A[j]: docked_heads.GetConformer().GetAtomPosition(docked_A[j]) for j in range(len(docked_A)) } cmap.update({ head_B[j]: docked_heads.GetConformer().GetAtomPosition(docked_B[j]) for j in range(len(docked_B)) }) #only half of the atoms are required to make the constrained embedding #this is done because using all the atoms sometimes makes it impossible #to find solutions, the half is chosen randomly for each generation cmap_tag = random.sample(list(cmap), int(len(cmap) / 2)) cmap_tag = {ctag: cmap[ctag] for ctag in cmap_tag} if AllChem.EmbedMolecule(head_linker, coordMap=cmap_tag, randomSeed=seed, useBasicKnowledge=True, maxAttempts=10) == -1: continue #final alignment to bring the new conformation to the position of the pose's heads #this is needed because the constrained embedding only applies #to distances and not to atoms position rdMolAlign.AlignMol(head_linker, docked_heads, atomMap=amap) #make sure the alignment is good enough for both heads (also to ensure the save isomer #for ambiguous rings if rmsd(head_linker, docked_heads, head_A, docked_A) < 0.5 and rmsd( head_linker, docked_heads, head_B, docked_B) < 0.5: writer.write(head_linker) i += 1 return head_A[int(Anchor_A)], v_atoms_sdf
target = AllChem.AssignBondOrdersFromTemplate(ref, target) print('from structure') display(target) #mol2 = Chem.MolFromSmiles('CC(=O)OC1=CC=CC=C1') #'phenylacetate' probe = Chem.MolFromSmiles('Oc1(O)ccc2ccccc2[nH]1') Chem.AddHs(probe) AllChem.EmbedMolecule(probe) AllChem.UFFOptimizeMolecule(probe, maxIters=2000) Chem.rdPartialCharges.ComputeGasteigerCharges(probe) print('new') display(probe) ### find what is common res = Chem.rdFMCS.FindMCS( [probe, target], #matchValences=True, atomCompare=Chem.rdFMCS.AtomCompare.CompareElements, bondCompare=Chem.rdFMCS.BondCompare.CompareOrder) common = Chem.MolFromSmarts(res.smartsString) print('Common') display(common) ### Align them overlap_target = target.GetSubstructMatch(common) overlap_probe = probe.GetSubstructMatch(common) atomMap = [(probe_at, target_at) for probe_at, target_at in zip(overlap_probe, overlap_target)] print(atomMap) rms = rdMolAlign.AlignMol(probe, target, atomMap=atomMap, maxIters=500) print(rms) Chem.MolToMolFile(probe, 'inter.aligned.mol')