def display_selected_data(selectedData): max_structs = 12 structs_per_row = 6 empty_plot = "data:image/gif;base64,R0lGODlhAQABAAAAACwAAAAAAQABAAA=" if selectedData: if len(selectedData['points']) == 0: return empty_plot match_idx = [x['pointIndex'] for x in selectedData['points']] match_df = df.iloc[match_idx] smiles_list = list(match_df.SMILES) name_list = list(match_df.Name) active_list = list(df.is_active) mol_list = [Chem.MolFromSmiles(x) for x in smiles_list] name_list = [ x + " " + str(y) for (x, y) in zip(name_list, active_list) ] img = MolsToGridImage(mol_list[0:max_structs], molsPerRow=structs_per_row, legends=name_list) buffered = BytesIO() img.save(buffered, format="JPEG") encoded_image = base64.b64encode(buffered.getvalue()) src_str = 'data:image/png;base64,{}'.format(encoded_image.decode()) else: return empty_plot return src_str
def plot_top_n(smiles, ref_smiles, n=1, fp='FCFP4', sim='tanimoto', filename=None): mols = list() sims = list() for r in ref_smiles: m, s = get_most_similar(smiles, referencemol=r, n=n, similarity=sim, desc=fp) mols.extend([r] + m.tolist()) sims.extend([1.] + s.tolist()) img = MolsToGridImage([MolFromSmiles(mol) for mol in mols], molsPerRow=n + 1, subImgSize=(300, 300), legends=["%.4f" % s for s in sims]) if filename: img.save(filename) with open(filename[:-4] + '.csv', 'w') as f: [f.write("%s,%.4f\n" % (m, s)) for m, s in zip(mols, sims)] else: img.show()
def display_selected_data(y, selection=None): max_structs = 24 structs_per_row = 4 empty_plot = "data:image/gif;base64,R0lGODlhAQABAAAAACwAAAAAAQABAAA=" if selection is None: return empty_plot else: match_idx = selection['BOX_SELECT']['data'] st.write(main_df.iloc[match_idx]) smis = main_df.loc[match_idx, 'smiles'].tolist() mols = [Chem.MolFromSmiles(smi) for smi in smis] name_list = list(main_df.iloc[match_idx][y]) batch_list = [ f"{step}_{batch_idx}" for step, batch_idx in main_df.loc[match_idx, ['step', 'batch_idx']].values ] name_list = [ f"{x:.02f}" if isinstance(x, float) else f"{x}" for x in name_list ] legends = [ f"{idx}\n{y}: {name}" for idx, name in zip(batch_list, name_list) ] img = MolsToGridImage(mols[0:max_structs], molsPerRow=structs_per_row, legends=legends[0:max_structs], subImgSize=(300, 300)) buffered = BytesIO() img.save(buffered, format="JPEG") encoded_image = base64.b64encode(buffered.getvalue()) src_str = 'data:image/png;base64,{}'.format(encoded_image.decode()) return src_str
def visualize(smi_file): import random import math from rdkit.Chem.Draw import MolsToGridImage from rdkit.Chem.rdmolfiles import SmilesMolSupplier # load molecules from file mols = SmilesMolSupplier(smi_file, sanitize=True, nameColumn=-1) n_samples = 100 mols_list = [mol for mol in mols] mols_sampled = random.sample(mols_list, n_samples) # sample 100 random molecules to visualize mols_per_row = int(math.sqrt(n_samples)) # make a square grid png_filename=smi_file[:-3] + "png" # name of PNG file to create print(png_filename) labels=list(range(n_samples)) # label structures with a number # draw the molecules (creates a PIL image) img = MolsToGridImage(mols=mols_sampled, molsPerRow=mols_per_row, legends=[str(i) for i in labels]) img.save(png_filename)
def display_selected_data(selectedData, y): max_structs = 12 structs_per_row = 3 empty_plot = "data:image/gif;base64,R0lGODlhAQABAAAAACwAAAAAAQABAAA=" if selectedData: if len(selectedData['points']) == 0: return empty_plot match_idx = [x['pointIndex'] for x in selectedData['points']] smiles_list = [ Chem.MolFromSmiles(x) for x in list(main_df.iloc[match_idx].smiles) ] name_list = list(main_df.iloc[match_idx][y]) batch_list = [ f"{step}_{batch_idx}" for step, batch_idx in main_df.loc[match_idx, ['step', 'batch_idx']].values ] name_list = [ f"{x:.02f}" if isinstance(x, float) else f"{x}" for x in name_list ] #active_list = list(main_df.iloc[match_idx].is_active) legends = [ f"{idx}\n{y}: {name}" for idx, name in zip(batch_list, name_list) ] img = MolsToGridImage(smiles_list[0:max_structs], molsPerRow=structs_per_row, legends=legends, subImgSize=(300, 300)) buffered = BytesIO() img.save(buffered, format="JPEG") encoded_image = base64.b64encode(buffered.getvalue()) src_str = 'data:image/png;base64,{}'.format(encoded_image.decode()) else: return empty_plot return src_str
def make_image_grid(file_label, smi_list, labels=None, out_dir=PNG_DIR, mol_img_size=(400, 300), write_output=True): """ Given a molecular formula (or other label) and the set of SMI, make an image grid of all smiles within https://www.rdkit.org/docs/GettingStartedInPython.html :param file_label: str, such as chemical formula that corresponds to all smiles in SMILES set :param smi_list: list or set of SMILES strings; used to generate images :param labels: if None, will use the smi_list as labels; otherwise a list to use :param out_dir: directory where the file should be saved :param mol_img_size: tuple of ints to determine size of individual molecules :param write_output: boolean to determine whether to write to screen that a file was created :return: N/A, save a file """ mols = [] for smi in smi_list: mol = Chem.MolFromSmiles(smi) Compute2DCoords(mol) mols.append(mol) if labels: img_labels = labels else: img_labels = smi_list if len(mols) == 1: # didn't see a way for RDKit to add a label to an image with a single molecule (grid image does not work # for one image), so add to file name file_label += '_' + img_labels[0] fname = create_out_fname(file_label, ext='png', base_dir=out_dir) if len(mols) == 1: MolToFile(mols[0], fname, size=mol_img_size) else: img_grid = MolsToGridImage(mols, molsPerRow=3, subImgSize=mol_img_size, legends=img_labels) img_grid.save(fname) if write_output: print(f"Wrote file: {os.path.relpath(fname)}")
#! /usr/bin/env python import sys from rdkit.Chem import SDMolSupplier, MolToPDBFile, AllChem, AddHs, RemoveHs from rdkit.Chem.Draw import MolsToGridImage spl = SDMolSupplier(sys.argv[1]) mols = [m for m in spl] for i, m in enumerate(mols): m = AddHs(m) AllChem.EmbedMolecule(m, useBasicKnowledge=True, maxAttempts=100) AllChem.MMFFOptimizeMolecule(m) RemoveHs(m) MolToPDBFile(m, 'ligand_%d.pdb' % i) img = MolsToGridImage(mols, legends=["ligand_%d" % i for i in range(len(mols))]) img.save('ligands.png')