def get_rmsd(smiles, pdb_path, ref_pdb_path): smiles_mol = Chem.MolFromSmiles(smiles) ref_mol = Chem.MolFromPDBFile(ref_pdb_path) mol = Chem.MolFromPDBFile(pdb_path) ref_mol = AllChem.AssignBondOrdersFromTemplate(smiles_mol,ref_mol) mol = AllChem.AssignBondOrdersFromTemplate(smiles_mol, mol) order = list(mol.GetSubstructMatches(ref_mol)[0]) mol = Chem.RenumberAtoms(mol, order) indices = cpeptools.get_largest_ring(ref_mol) assert len(set(indices) - set(cpeptools.get_largest_ring(mol))) == 0, "ring atom indices do not agree" tmp_dir = tempfile.mkdtemp() ref_pdb_filename = tempfile.mktemp(suffix=".pdb", dir = tmp_dir) pdb_filename = tempfile.mktemp(suffix=".pdb", dir = tmp_dir) Chem.MolToPDBFile(ref_mol, ref_pdb_filename) Chem.MolToPDBFile(mol, pdb_filename) ref = md.load(ref_pdb_filename) compare = md.load(pdb_filename) rmsd = md.rmsd(compare, ref, 0) ring_rmsd = md.rmsd(compare, ref, 0, atom_indices = indices) compare = compare.superpose(ref, 0, atom_indices = indices) return rmsd, compare[np.argmin(rmsd)] , ring_rmsd, compare[np.argmin(ring_rmsd)] , ref
num_conf_list = [] mol_list = [] tmp = sys.argv[0][:-3] tmp = tmp.split(".")[-1] folder = "./" + tmp #remove the number at the beginning for folder names, this way scripts all starts with number, while folders do not df = pd.read_csv("../reference/smiles.tsv", sep="\t", comment="%") mol_dict = { name: Chem.MolFromSmiles(val) for name, val in zip(df.Name, df.Smiles) } for name in mol_dict: print(name) rsize = len(cpeptools.get_largest_ring(mol_dict[name])) ref = mol_dict[name] #ref is mol from smiles mol = ref try: mol = Chem.AddHs(mol) except ValueError: print("cannot add H to {}".format(name)) continue mol.UpdatePropertyCache() Chem.GetSymmSSSR(mol) mol_list.append(mol) path = folder + "/" + str(name) + "/" path_list.append(path) # this code generates different number of conformers given then ring size