def test13CrippenO3A(self): " test CrippenO3A with variable weight constraints followed by local-only optimization " sdf = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign', 'test_data', 'ref_e2.sdf') # alignedSdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol', # 'MolAlign', 'test_data', 'localonly.sdf') molS = Chem.SDMolSupplier(sdf, True, False) refNum = 23 prbNum = 32 refMol = molS[refNum] prbMol = molS[prbNum] refPyMP = ChemicalForceFields.MMFFGetMoleculeProperties(refMol) prbPyMP = ChemicalForceFields.MMFFGetMoleculeProperties(prbMol) refSIdx = refMol.GetSubstructMatch(Chem.MolFromSmarts('S'))[0] prbOIdx = prbMol.GetSubstructMatch(Chem.MolFromSmarts('O'))[0] # molW = Chem.SDWriter(alignedSdf) # molW.write(refMol) weights = [0.1, 100.0] distOS = [2.7, 0.4] for i in [0, 1]: pyO3A = rdMolAlign.GetCrippenO3A(prbMol, refMol, constraintMap=[[prbOIdx, refSIdx]], constraintWeights=[weights[i]]) pyO3A.Align() # molW.write(prbMol) pyO3A = rdMolAlign.GetCrippenO3A(prbMol, refMol, options=4) pyO3A.Align() # molW.write(prbMol) d = prbMol.GetConformer().GetAtomPosition(prbOIdx). \ Distance(refMol.GetConformer().GetAtomPosition(refSIdx)) self.assertAlmostEqual(d, distOS[i], 1)
def test12CrippenO3A(self): " test CrippenO3A with constraints " #we superimpose two identical coplanar 4-phenylpyridines: #1) the usual way #2) forcing the pyridine nitrogen to match with the para # carbon of the phenyl ring m = Chem.MolFromSmiles('n1ccc(cc1)-c1ccccc1') m1 = Chem.AddHs(m) rdDistGeom.EmbedMolecule(m1) mp = ChemicalForceFields.MMFFGetMoleculeProperties(m1) ff = ChemicalForceFields.MMFFGetMoleculeForceField(m1, mp) ff.Minimize() sub1 = m1.GetSubstructMatch(Chem.MolFromSmarts('nccc-cccc')) nIdx = sub1[0] cIdx = sub1[-1] dihe = sub1[2:6] rdMolTransforms.SetDihedralDeg(m1.GetConformer(), dihe[0], dihe[1], dihe[2], dihe[3], 0) m2 = copy.copy(m1) rdMolAlign.RandomTransform(m2) m3 = copy.copy(m2) pyO3A = rdMolAlign.GetCrippenO3A(m2, m1) pyO3A.Align() d = m2.GetConformer().GetAtomPosition(cIdx). \ Distance(m1.GetConformer().GetAtomPosition(cIdx)) self.assertAlmostEqual(d, 0, 0) pyO3A = rdMolAlign.GetCrippenO3A(m3, m1, constraintMap=[[cIdx, nIdx]]) pyO3A.Align() d = m3.GetConformer().GetAtomPosition(cIdx). \ Distance(m1.GetConformer().GetAtomPosition(cIdx)) self.assertAlmostEqual(d, 7, 0)
def test15MultiConfs(self): " test multi-conf alignment " sdf = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign', 'test_data', 'ref_e2.sdf') suppl = Chem.SDMolSupplier(sdf, removeHs=False) refMol = suppl[13] sdf = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign', 'test_data', 'probe_mol.sdf') prbSuppl = Chem.SDMolSupplier(sdf, removeHs=False) tms = [x for x in prbSuppl] prbMol = tms[0] for tm in tms[1:]: prbMol.AddConformer(tm.GetConformer(), True) self.failUnlessEqual(prbMol.GetNumConformers(), 50) refParams = ChemicalForceFields.MMFFGetMoleculeProperties(refMol) prbParams = ChemicalForceFields.MMFFGetMoleculeProperties(prbMol) cp = Chem.Mol(prbMol) o3s = rdMolAlign.GetO3AForProbeConfs(cp, refMol, 1, prbParams, refParams) for i in range(prbMol.GetNumConformers()): cp2 = Chem.Mol(prbMol) o3 = rdMolAlign.GetO3A(cp2, refMol, prbParams, refParams, prbCid=i) self.failUnlessAlmostEqual(o3s[i].Align(), o3.Align(), 6) self.failUnlessAlmostEqual(o3s[i].Score(), o3.Score(), 6) cp = Chem.Mol(prbMol) o3s = rdMolAlign.GetCrippenO3AForProbeConfs(cp, refMol) for i in range(prbMol.GetNumConformers()): cp2 = Chem.Mol(prbMol) o3 = rdMolAlign.GetCrippenO3A(cp2, refMol, prbCid=i) self.failUnlessAlmostEqual(o3s[i].Align(), o3.Align(), 6) self.failUnlessAlmostEqual(o3s[i].Score(), o3.Score(), 6)
def PerformAlignmentAndWrieOutput(RefMol, ProbeMol, RefMolName, ProbeMolName, Writer): """Perform alignment and write to output file.""" Status = True try: if OptionsInfo["UseRMSD"]: RMSD = rdMolAlign.AlignMol(ProbeMol, RefMol, maxIters=OptionsInfo["MaxIters"]) elif OptionsInfo["UseBestRMSD"]: RMSD = AllChem.GetBestRMS(RefMol, ProbeMol) elif OptionsInfo["UseOpen3A"]: O3A = rdMolAlign.GetO3A(ProbeMol, RefMol) Score = O3A.Align() elif OptionsInfo["UseCrippenOpen3A"]: CrippenO3A = rdMolAlign.GetCrippenO3A(ProbeMol, RefMol) Score = CrippenO3A.Align() else: MiscUtil.PrintError( "Alignment couldn't be performed: Specified alignment value, %s, is not supported" % OptionsInfo["Alignment"]) except (RuntimeError, ValueError): Status = False MiscUtil.PrintWarning( "Alignment failed between reference molecule, %s, and probe molecule, %s.\nWriting unaligned probe molecule...\n" % (RefMolName, ProbeMolName)) # Write out aligned probe molecule... Writer.write(ProbeMol) return Status
def PerformShapeAlignment(RefMol, ProbeMol): """Perform shape alignment and return alignment score.""" if OptionsInfo["UseCrippenOpen3A"]: CrippenO3A = rdMolAlign.GetCrippenO3A(ProbeMol, RefMol) Score = CrippenO3A.Align() else: O3A = rdMolAlign.GetO3A(ProbeMol, RefMol) Score = O3A.Align() return Score
def align_set_of_ligands(ligands: Sequence) -> Tuple[List[Chem.Mol], List[float]]: """ Align a set of ligands to each other Parameters ---------- ligands : list of rdkit.Chem.rdchem.Mol or rdkit.Chem.SmilesMolSupplier or rdkit.Chem.SDMolSupplier List of ligands. Returns ---------- aligned_molecules : list of rdkit.Chem.rdchem.Mol List of aligned ligands. crippen_score : list of float List with crippen scores calculated during the alignment. """ if not isinstance(ligands, list): ligands = list(ligands) molecules = copy.deepcopy(ligands) molecules = [generate_conformers(mol, 100) for mol in molecules] crippen_contribs = [rdMolDescriptors._CalcCrippenContribs(mol) for mol in molecules] crippen_ref_contrib = crippen_contribs[0] crippen_prob_contribs = crippen_contribs ref_mol = molecules[0] probe_mols = molecules crippen_score = [] aligned_molecules = [] for idx, mol in enumerate(probe_mols): tempscore = [] for cid in range(100): crippenO3A = rdMolAlign.GetCrippenO3A(mol, ref_mol, crippen_prob_contribs[idx], crippen_ref_contrib, cid, 0) crippenO3A.Align() tempscore.append(crippenO3A.Score()) best = np.argmax(tempscore) mol_string = Chem.MolToMolBlock(mol, confId=int(best)) temp_mol = Chem.MolFromMolBlock(mol_string, removeHs=False) crippen_score.append(tempscore[best]) aligned_molecules.append(temp_mol) return aligned_molecules, crippen_score
def test11CrippenO3A(self): " now test where the Crippen parameters are generated on call " sdf = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign', 'test_data', 'ref_e2.sdf') molS = Chem.SDMolSupplier(sdf, True, False) refNum = 48 refMol = molS[refNum] cumScore = 0.0 cumMsd = 0.0 for prbMol in molS: pyO3A = rdMolAlign.GetCrippenO3A(prbMol, refMol) cumScore += pyO3A.Score() rmsd = pyO3A.Trans()[0] cumMsd += rmsd * rmsd cumMsd /= len(molS) self.assertAlmostEqual(cumScore, 4918, 0) self.assertAlmostEqual(math.sqrt(cumMsd), .304, 3)
def test8CrippenO3A(self): sdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol', 'MolAlign', 'test_data', 'ref_e2.sdf') # alignedSdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol', # 'MolAlign', 'test_data', 'ref_e2_pyCrippenO3A.sdf') molS = Chem.SDMolSupplier(sdf, True, False) # molW = Chem.SDWriter(alignedSdf) refNum = 48 refMol = molS[refNum] cumScore = 0.0 cumMsd = 0.0 refList = rdMolDescriptors._CalcCrippenContribs(refMol, True) for prbMol in molS: prbList = rdMolDescriptors._CalcCrippenContribs(prbMol, True) pyO3A = rdMolAlign.GetCrippenO3A(prbMol, refMol, prbList, refList) cumScore += pyO3A.Score() rmsd = pyO3A.Align() cumMsd += rmsd * rmsd # molW.write(prbMol) cumMsd /= len(molS) self.failUnlessAlmostEqual(cumScore,4918,0) self.failUnlessAlmostEqual(math.sqrt(cumMsd),.304,3)
def retrieve_features(df, Complex, molecules_3D, fragments, core, resname, positions_to_grow, bonded_atom, core_smile_pattern, labels=None, save=True): # Preprocess fragment features_test = pds.DataFrame(columns=COLUMNS) true_labels = [] print("nmolecules: {}\n nfragments: {},\n nlabels: {}\n".format(len(molecules_3D), len(fragments), len(labels))) for i, (mol, fragment, bonded, label) in tqdm(enumerate(zip(molecules_3D, fragments, bonded_atom, labels)), total=len(fragments)): #Adapt ring to the right index next_ring = str(max([int(c) for c in core_smile_pattern if c.isdigit()]) + 1) for i in range(1, int(next_ring)): new_fragment = fragment.replace(str(i), next_ring) next_ring = str(int(next_ring) + 1) if new_fragment: fragment = new_fragment #mol = Chem.AddHs(mol) rec_info = [] frag_info = [ 1 if i==bonded else 0 for i in range(positions_to_grow) ] atoms1 = pd.parsePDB(Complex) fragment = Chem.MolFromSmiles(fragment.strip("!")) # Count Heteroatoms n_carbon, n_nitrogen, n_oxigen = 0, 0, 0 for atom in fragment.GetAtoms(): if atom.GetAtomicNum() == 6: n_carbon += 1 if atom.GetAtomicNum() == 7: n_nitrogen += 1 if atom.GetAtomicNum() == 8: n_oxigen += 1 frag_info.extend([n_carbon, n_nitrogen, n_oxigen]) # Fragment Properties fragment_features = collections.OrderedDict({ "Donor": 0, "Acceptor":0, "Aromatic": 0, "Hydrophobe":0}) for feature in factory.GetFeaturesForMol(fragment): feature_family = feature.GetFamily() #Embed fragment properties if feature_family in fragment_features: fragment_features[feature_family] += 1 # Calculate fragmetn volume #fragment_h = Chem.AddHs(fragment) #AllChem.EmbedMolecule(fragment_h) #fragment_volume = Chem.AllChem.ComputeMolVolume(fragment_h) #Embed information on a vector #frag_info = fragment_volume + fragment__features.values() frag_info.extend(fragment_features.values()) frag_info.append(len(fragment.GetAtoms())) frag_MACCS_fp = MACCSkeys.GenMACCSKeys(mol).ToBitString() for i in range(1, 167): frag_info.append(int(frag_MACCS_fp[i])) frag_info.append(Chem.rdMolDescriptors.CalcNumRotatableBonds(fragment)) frag_info.append(Chem.rdMolDescriptors.CalcNumRings(fragment)) frag_info.append(Chem.rdMolDescriptors.CalcNumHBD(fragment)) frag_info.append(Chem.rdMolDescriptors.CalcNumHBA(fragment)) frag_info.append(Chem.rdMolDescriptors.CalcNumHeteroatoms(fragment)) frag_info.append(Chem.rdMolDescriptors.CalcNumAmideBonds(fragment)) frag_info.append(Chem.rdMolDescriptors.CalcFractionCSP3(fragment)) frag_info.append(Chem.rdMolDescriptors.CalcExactMolWt(fragment)) # Align molecules molec = rdMolAlign.GetCrippenO3A(mol, core) molec.Align() #Centroid of fragment fragment_atom_indxs = mol.GetSubstructMatch(fragment) molec_atoms = mol.GetAtoms() coords = [mol.GetConformer().GetAtomPosition(i) for i, atom in enumerate(molec_atoms) if i in fragment_atom_indxs ] try: fragment_centeroid = centeroidnp(np.array(coords)) except IndexError: print("Molecule {} skipped".format(mol.GetProp("_Name"))) continue frag_info = np.hstack([frag_info, fragment_centeroid]) #Extract receptor info closer_residues = pd.parsePDB(Complex).select("protein within 10 of center", center=fragment_centeroid) distances = [ np.linalg.norm(fragment_centeroid-coords) for coords in closer_residues.getCoords() ] min_dist, average_dist, n_close_residues = np.min(distances), np.mean(distances), len(distances) residues = [0] * len(VOC_AMINOACIDS) for res in closer_residues.getResnames(): idx = VOC_AMINOACIDS.index(res) residues[idx] += 1 rec_info = np.hstack([np.array([min_dist, average_dist, n_close_residues]), np.array(residues)]) fragment_receptor_info = np.hstack([frag_info, rec_info]) # Save to general df df = df.append(pds.Series(fragment_receptor_info, index=COLUMNS), ignore_index=True) features_test = features_test.append(pds.Series(fragment_receptor_info, index=COLUMNS), ignore_index=True) true_labels.append(label) if save: df_save = features_test.copy() df_save["labels"] = true_labels df_save.to_csv("descriptors.csv") return df, features_test, true_labels
m1 = Chem.MolFromMolBlock(m, removeHs=False) mol_list.append(m1) cdk2mol = [m for m in mol_list] cdk2mol2 = copy.deepcopy(cdk2mol) crippen_contribs = [ rdMolDescriptors._CalcCrippenContribs(mol) for mol in cdk2mol2 ] ref = cdk2mol_reference ref_contrib = rdMolDescriptors._CalcCrippenContribs(ref) targets = cdk2mol2[0:] targets_contrib = crippen_contribs[0:] for i, target in enumerate(targets): crippenO3A = rdMolAlign.GetCrippenO3A(target, ref, targets_contrib[i], ref_contrib) crippenO3A.Align() v.DeleteAll() v.ShowMol(ref, name='ref', showOnly=False) for i in range(len(targets)): name = f'probe_{i}' v.ShowMol(targets[i], name=name, showOnly=False) ref_mol_block = Chem.MolToMolBlock(ref_mol, removeHs=False) hmols_1 = mol_list crippen_contribs = [ rdMolDescriptors._CalcCrippenContribs(mol) for mol in hmols_1 ] crippen_ref_contrib = crippen_contribs[0]
def main(): parser = argparse.ArgumentParser(description='Open3DAlign with RDKit') parser.add_argument('query', help='query molfile') parser.add_argument( '--qmolidx', help="Query molecule index in SD file if not the first", type=int, default=1) parser.add_argument('--crippen', action='store_true', help='Use Crippen (logP) contributions') parser.add_argument( '-t', '--threshold', type=float, help='score cuttoff relative to alignment of query to itself') parser.add_argument( '-n', '--num', default=0, type=int, help= 'number of conformers to generate, if None then input structures are assumed to already be 3D' ) parser.add_argument('-a', '--attempts', default=0, type=int, help='number of attempts to generate conformers') parser.add_argument('-r', '--rmsd', type=float, default=1.0, help='prune RMSD threshold for excluding conformers') parser.add_argument( '-e', '--emin', type=int, default=0, help= 'energy minimisation iterations for generated conformers (default of 0 means none)' ) parameter_utils.add_default_io_args(parser) args = parser.parse_args() utils.log("o3dAlign Args: ", args) # TODO - handle molecules with multiple fragments # TODO - allow to specify threshold as fraction of perfect score? qmol = rdkit_utils.read_single_molecule(args.query, index=args.qmolidx) qmol = Chem.RemoveHs(qmol) qmol2 = Chem.Mol(qmol) source = "conformers.py" datasetMetaProps = { "source": source, "description": "Open3DAlign using RDKit " + rdBase.rdkitVersion } clsMappings = {"O3DAScore": "java.lang.Float"} fieldMetaProps = [{ "fieldName": "O3DAScore", "values": { "source": source, "description": "Open3DAlign alignment score" } }] if args.num > 0: # we generate the conformers so will add energy info clsMappings["EnergyDelta"] = "java.lang.Float" clsMappings["EnergyAbs"] = "java.lang.Float" fieldMetaProps.append({ "fieldName": "EnergyDelta", "values": { "source": source, "description": "Energy difference to lowest energy conformer" } }) fieldMetaProps.append({ "fieldName": "EnergyAbs", "values": { "source": source, "description": "Absolute energy" } }) input,output,suppl,writer,output_base = rdkit_utils.\ default_open_input_output(args.input, args.informat, args.output, 'o3dAlign', args.outformat, valueClassMappings=clsMappings, datasetMetaProps=datasetMetaProps, fieldMetaProps=fieldMetaProps) if args.crippen: pyO3A = rdMolAlign.GetCrippenO3A(qmol2, qmol) else: pyO3A = rdMolAlign.GetO3A(qmol2, qmol) perfect_align = pyO3A.Align() perfect_score = pyO3A.Score() utils.log('Perfect score:', perfect_align, perfect_score, Chem.MolToSmiles(qmol, isomericSmiles=True), qmol.GetNumAtoms()) i = 0 count = 0 total = 0 errors = 0 for mol in suppl: if mol is None: i += 1 continue try: if args.num > 0: mol.RemoveAllConformers() conformerProps, minEnergy = conformers.process_mol_conformers( mol, i, args.num, args.attempts, args.rmsd, None, None, 0) mol = Chem.RemoveHs(mol) count += doO3Dalign(i, mol, qmol, args.crippen, args.threshold, perfect_score, writer, conformerProps=conformerProps, minEnergy=minEnergy) else: mol = Chem.RemoveHs(mol) count += doO3Dalign(i, mol, qmol, args.crippen, args.threshold, perfect_score, writer) total += mol.GetNumConformers() except ValueError as e: errors += 1 utils.log("Molecule", i, "failed to align:", e.message) i += 1 input.close() writer.flush() writer.close() output.close() if args.meta: utils.write_metrics( output_base, { '__InputCount__': i, '__OutputCount__': count, '__ErrorCount__': errors, 'RDKitO3DAlign': total })