def display_selected_data(y, selection=None): max_structs = 24 structs_per_row = 4 empty_plot = "" if selection is None: return empty_plot else: match_idx = selection['BOX_SELECT']['data'] st.write(main_df.iloc[match_idx]) smis = main_df.loc[match_idx, 'smiles'].tolist() mols = [Chem.MolFromSmiles(smi) for smi in smis] name_list = list(main_df.iloc[match_idx][y]) batch_list = [ f"{step}_{batch_idx}" for step, batch_idx in main_df.loc[match_idx, ['step', 'batch_idx']].values ] name_list = [ f"{x:.02f}" if isinstance(x, float) else f"{x}" for x in name_list ] legends = [ f"{idx}\n{y}: {name}" for idx, name in zip(batch_list, name_list) ] img = MolsToGridImage(mols[0:max_structs], molsPerRow=structs_per_row, legends=legends[0:max_structs], subImgSize=(300, 300)) buffered = BytesIO() img.save(buffered, format="JPEG") encoded_image = base64.b64encode(buffered.getvalue()) src_str = 'data:image/png;base64,{}'.format(encoded_image.decode()) return src_str
def display_selected_data(selectedData): max_structs = 12 structs_per_row = 6 empty_plot = "" if selectedData: if len(selectedData['points']) == 0: return empty_plot match_idx = [x['pointIndex'] for x in selectedData['points']] match_df = df.iloc[match_idx] smiles_list = list(match_df.SMILES) name_list = list(match_df.Name) active_list = list(df.is_active) mol_list = [Chem.MolFromSmiles(x) for x in smiles_list] name_list = [ x + " " + str(y) for (x, y) in zip(name_list, active_list) ] img = MolsToGridImage(mol_list[0:max_structs], molsPerRow=structs_per_row, legends=name_list) buffered = BytesIO() img.save(buffered, format="JPEG") encoded_image = base64.b64encode(buffered.getvalue()) src_str = 'data:image/png;base64,{}'.format(encoded_image.decode()) else: return empty_plot return src_str
def visualize(smi_file): import random import math from rdkit.Chem.Draw import MolsToGridImage from rdkit.Chem.rdmolfiles import SmilesMolSupplier # load molecules from file mols = SmilesMolSupplier(smi_file, sanitize=True, nameColumn=-1) n_samples = 100 mols_list = [mol for mol in mols] mols_sampled = random.sample(mols_list, n_samples) # sample 100 random molecules to visualize mols_per_row = int(math.sqrt(n_samples)) # make a square grid png_filename=smi_file[:-3] + "png" # name of PNG file to create print(png_filename) labels=list(range(n_samples)) # label structures with a number # draw the molecules (creates a PIL image) img = MolsToGridImage(mols=mols_sampled, molsPerRow=mols_per_row, legends=[str(i) for i in labels]) img.save(png_filename)
def display_selected_data(selectedData, y): max_structs = 12 structs_per_row = 3 empty_plot = "" if selectedData: if len(selectedData['points']) == 0: return empty_plot match_idx = [x['pointIndex'] for x in selectedData['points']] smiles_list = [ Chem.MolFromSmiles(x) for x in list(main_df.iloc[match_idx].smiles) ] name_list = list(main_df.iloc[match_idx][y]) batch_list = [ f"{step}_{batch_idx}" for step, batch_idx in main_df.loc[match_idx, ['step', 'batch_idx']].values ] name_list = [ f"{x:.02f}" if isinstance(x, float) else f"{x}" for x in name_list ] #active_list = list(main_df.iloc[match_idx].is_active) legends = [ f"{idx}\n{y}: {name}" for idx, name in zip(batch_list, name_list) ] img = MolsToGridImage(smiles_list[0:max_structs], molsPerRow=structs_per_row, legends=legends, subImgSize=(300, 300)) buffered = BytesIO() img.save(buffered, format="JPEG") encoded_image = base64.b64encode(buffered.getvalue()) src_str = 'data:image/png;base64,{}'.format(encoded_image.decode()) else: return empty_plot return src_str
def make_image_grid(file_label, smi_list, labels=None, out_dir=PNG_DIR, mol_img_size=(400, 300), write_output=True): """ Given a molecular formula (or other label) and the set of SMI, make an image grid of all smiles within https://www.rdkit.org/docs/GettingStartedInPython.html :param file_label: str, such as chemical formula that corresponds to all smiles in SMILES set :param smi_list: list or set of SMILES strings; used to generate images :param labels: if None, will use the smi_list as labels; otherwise a list to use :param out_dir: directory where the file should be saved :param mol_img_size: tuple of ints to determine size of individual molecules :param write_output: boolean to determine whether to write to screen that a file was created :return: N/A, save a file """ mols = [] for smi in smi_list: mol = Chem.MolFromSmiles(smi) Compute2DCoords(mol) mols.append(mol) if labels: img_labels = labels else: img_labels = smi_list if len(mols) == 1: # didn't see a way for RDKit to add a label to an image with a single molecule (grid image does not work # for one image), so add to file name file_label += '_' + img_labels[0] fname = create_out_fname(file_label, ext='png', base_dir=out_dir) if len(mols) == 1: MolToFile(mols[0], fname, size=mol_img_size) else: img_grid = MolsToGridImage(mols, molsPerRow=3, subImgSize=mol_img_size, legends=img_labels) img_grid.save(fname) if write_output: print(f"Wrote file: {os.path.relpath(fname)}")
def draw_mol_labels(labels_dict, actions_history_smi_pop, actions_history_smi_removed, actions_history_scores_pop, actions_history_scores_removed, legend_scores_keys_strat=None, problem_type="max", mols_per_row=4, draw_n_mols=None): smi_to_draw = {} legends_to_draw = {} scores_float = {} for action_history_k in labels_dict.keys(): if labels_dict[action_history_k] != "": if action_history_k in actions_history_smi_pop: smi = actions_history_smi_pop[action_history_k] smi_to_draw[labels_dict[action_history_k]] = smi legend, scores = compute_mol_legend(action_history_k, smi, actions_history_scores_pop, legend_scores_keys_strat) legends_to_draw[labels_dict[action_history_k]] = legend scores_float[labels_dict[action_history_k]] = scores else: smi = actions_history_smi_removed[action_history_k] smi_to_draw[labels_dict[action_history_k]] = smi legend, scores = compute_mol_legend(action_history_k, smi, actions_history_scores_removed, legend_scores_keys_strat) legends_to_draw[labels_dict[action_history_k]] = legend scores_float[labels_dict[action_history_k]] = scores mols = [] legends = [] scores_to_sort = [] for k, smi in smi_to_draw.items(): mols.append(MolFromSmiles(smi)) legends.append(legends_to_draw[k]) scores_to_sort.append(scores_float[k][0]) mols = np.array(mols) legends = np.array(legends) # Sorting molecules sorted_order = np.argsort(scores_to_sort) if problem_type == "max": sorted_order = sorted_order[::-1] # Filtering molecules if necessary if draw_n_mols is not None: mols = mols[:draw_n_mols] legends = legends[:draw_n_mols] legends = list(legends[sorted_order]) mols = list(mols[sorted_order]) img = MolsToGridImage(mols, legends=legends, molsPerRow=mols_per_row, subImgSize=(200, 200)) return img
def plot_top_n(smiles, ref_smiles, n=1, fp='FCFP4', sim='tanimoto', filename=None): mols = list() sims = list() for r in ref_smiles: m, s = get_most_similar(smiles, referencemol=r, n=n, similarity=sim, desc=fp) mols.extend([r] + m.tolist()) sims.extend([1.] + s.tolist()) img = MolsToGridImage([MolFromSmiles(mol) for mol in mols], molsPerRow=n + 1, subImgSize=(300, 300), legends=["%.4f" % s for s in sims]) if filename: img.save(filename) with open(filename[:-4] + '.csv', 'w') as f: [f.write("%s,%.4f\n" % (m, s)) for m, s in zip(mols, sims)] else: img.show()
def depictMultipleMols(mols_list, filename=None, ipython=False, legends=None, highlightAtoms=None, mols_perrow=3): """ Returns the image or the ipython rendering. Parameters ---------- mols_list: list The list of the rdkit molecules to depict filename: str The filename of the image ipython: bool If True, the SVG rendering for jupiter-nootebook are returned legends: list List of titles subfigure for each molecule highlightAtoms: list List of list of atom index to highlight. mols_perrow: int The number of subfigures per row Returns ------- svg: SVG If ipython set as True, the SVG rendering is returned """ from rdkit.Chem.Draw import MolsToGridImage from IPython.display import SVG from os.path import splitext sel_atoms = [] sel_colors = [] if highlightAtoms is not None: if isinstance(highlightAtoms[0][0], list): sel_atoms = [[a for a in subset] for mol_set in highlightAtoms for subset in mol_set] sel_colors = [{ aIdx: _highlight_colors[n % len(_highlight_colors)] for aIdx in subset } for mol_set in highlightAtoms for n, subset in enumerate(mol_set)] else: sel_atoms = highlightAtoms sel_colors = [{aIdx: _highlight_colors[0] for aIdx in subset} for subset in highlightAtoms] svg = MolsToGridImage(mols_list, highlightAtomLists=sel_atoms, highlightBondLists=[], highlightAtomColors=sel_colors, legends=legends, molsPerRow=mols_perrow, useSVG=True) if filename: ext = splitext(filename)[-1] filename = filename if ext != '' else filename + '.svg' f = open(filename, 'w') f.write(svg) f.close() if ipython: return SVG(svg) else: return None
) plt.figure(figsize=(10, 6)) plt.plot(history.history["AUC"], label="train AUC") plt.plot(history.history["val_AUC"], label="valid AUC") plt.xlabel("Epochs", fontsize=16) plt.ylabel("AUC", fontsize=16) plt.legend(fontsize=16) """ ### Predicting """ molecules = [ molecule_from_smiles(df.smiles.values[index]) for index in test_index ] y_true = [df.p_np.values[index] for index in test_index] y_pred = tf.squeeze(mpnn.predict(test_dataset), axis=1) legends = [ f"y_true/y_pred = {y_true[i]}/{y_pred[i]:.2f}" for i in range(len(y_true)) ] MolsToGridImage(molecules, molsPerRow=4, legends=legends) """ ## Conclusions In this tutorial, we demonstarted a message passing neural network (MPNN) to predict blood-brain barrier permeability (BBBP) for a number of different molecules. We first had to construct graphs from SMILES, and then build a Keras model that could operate on these graphs. """
for p in predictions: smiles = ''.join(p) morning if Chem.MolFromSmiles(smiles) is not None: molecules.append(smiles) for m in molecules: print(m) smiles_list = [Chem.MolFromSmiles(x) for x in molecules] print(sorted([x.GetNumAtoms() for x in smiles_list])) good_mol_list = [x for x in smiles_list if x.GetNumAtoms() > 10 and x.GetNumAtoms() < 50] print(len(good_mol_list)) #obtain QED(drug-likeness) - drop all molecules with QED below 0.5 from rdkit.Chem import QED qed_list = [QED.qed(x) for x in good_mol_list] final_mol_list = [(a,b) for a,b in zip(good_mol_list,qed_list) if b > 0.5] for i in final_mol_list: print(i) from rdkit.Chem.Draw import MolsToGridImage #printing out the drawings of generated molecules MolsToGridImage([x[0] for x in final_mol_list], molsPerRow=3,useSVG=True, subImgSize=(250, 250), legends=[f"{x[1]:.2f}" for x in final_mol_list])
#! /usr/bin/env python import sys from rdkit.Chem import SDMolSupplier, MolToPDBFile, AllChem, AddHs, RemoveHs from rdkit.Chem.Draw import MolsToGridImage spl = SDMolSupplier(sys.argv[1]) mols = [m for m in spl] for i, m in enumerate(mols): m = AddHs(m) AllChem.EmbedMolecule(m, useBasicKnowledge=True, maxAttempts=100) AllChem.MMFFOptimizeMolecule(m) RemoveHs(m) MolToPDBFile(m, 'ligand_%d.pdb' % i) img = MolsToGridImage(mols, legends=["ligand_%d" % i for i in range(len(mols))]) img.save('ligands.png')