Ejemplo n.º 1
0
def display_selected_data(y, selection=None):
    max_structs = 24
    structs_per_row = 4
    empty_plot = "data:image/gif;base64,R0lGODlhAQABAAAAACwAAAAAAQABAAA="
    if selection is None:
        return empty_plot
    else:
        match_idx = selection['BOX_SELECT']['data']
        st.write(main_df.iloc[match_idx])
        smis = main_df.loc[match_idx, 'smiles'].tolist()
        mols = [Chem.MolFromSmiles(smi) for smi in smis]
        name_list = list(main_df.iloc[match_idx][y])
        batch_list = [
            f"{step}_{batch_idx}"
            for step, batch_idx in main_df.loc[match_idx,
                                               ['step', 'batch_idx']].values
        ]
        name_list = [
            f"{x:.02f}" if isinstance(x, float) else f"{x}" for x in name_list
        ]
        legends = [
            f"{idx}\n{y}: {name}" for idx, name in zip(batch_list, name_list)
        ]
        img = MolsToGridImage(mols[0:max_structs],
                              molsPerRow=structs_per_row,
                              legends=legends[0:max_structs],
                              subImgSize=(300, 300))
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        encoded_image = base64.b64encode(buffered.getvalue())
        src_str = 'data:image/png;base64,{}'.format(encoded_image.decode())
        return src_str
 def display_selected_data(selectedData):
     max_structs = 12
     structs_per_row = 6
     empty_plot = "data:image/gif;base64,R0lGODlhAQABAAAAACwAAAAAAQABAAA="
     if selectedData:
         if len(selectedData['points']) == 0:
             return empty_plot
         match_idx = [x['pointIndex'] for x in selectedData['points']]
         match_df = df.iloc[match_idx]
         smiles_list = list(match_df.SMILES)
         name_list = list(match_df.Name)
         active_list = list(df.is_active)
         mol_list = [Chem.MolFromSmiles(x) for x in smiles_list]
         name_list = [
             x + " " + str(y) for (x, y) in zip(name_list, active_list)
         ]
         img = MolsToGridImage(mol_list[0:max_structs],
                               molsPerRow=structs_per_row,
                               legends=name_list)
         buffered = BytesIO()
         img.save(buffered, format="JPEG")
         encoded_image = base64.b64encode(buffered.getvalue())
         src_str = 'data:image/png;base64,{}'.format(encoded_image.decode())
     else:
         return empty_plot
     return src_str
Ejemplo n.º 3
0
def visualize(smi_file):
  import random
  import math
  from rdkit.Chem.Draw import MolsToGridImage
  from rdkit.Chem.rdmolfiles import SmilesMolSupplier

  # load molecules from file
  mols = SmilesMolSupplier(smi_file, sanitize=True, nameColumn=-1)

  n_samples = 100
  mols_list = [mol for mol in mols]
  mols_sampled = random.sample(mols_list, n_samples)  # sample 100 random molecules to visualize

  mols_per_row = int(math.sqrt(n_samples))            # make a square grid

  png_filename=smi_file[:-3] + "png"  # name of PNG file to create
  print(png_filename)
  labels=list(range(n_samples))       # label structures with a number

  # draw the molecules (creates a PIL image)
  img = MolsToGridImage(mols=mols_sampled,
                        molsPerRow=mols_per_row,
                        legends=[str(i) for i in labels])

  img.save(png_filename)
Ejemplo n.º 4
0
def display_selected_data(selectedData, y):
    max_structs = 12
    structs_per_row = 3
    empty_plot = "data:image/gif;base64,R0lGODlhAQABAAAAACwAAAAAAQABAAA="
    if selectedData:
        if len(selectedData['points']) == 0:
            return empty_plot
        match_idx = [x['pointIndex'] for x in selectedData['points']]
        smiles_list = [
            Chem.MolFromSmiles(x) for x in list(main_df.iloc[match_idx].smiles)
        ]
        name_list = list(main_df.iloc[match_idx][y])
        batch_list = [
            f"{step}_{batch_idx}"
            for step, batch_idx in main_df.loc[match_idx,
                                               ['step', 'batch_idx']].values
        ]
        name_list = [
            f"{x:.02f}" if isinstance(x, float) else f"{x}" for x in name_list
        ]
        #active_list = list(main_df.iloc[match_idx].is_active)
        legends = [
            f"{idx}\n{y}: {name}" for idx, name in zip(batch_list, name_list)
        ]
        img = MolsToGridImage(smiles_list[0:max_structs],
                              molsPerRow=structs_per_row,
                              legends=legends,
                              subImgSize=(300, 300))
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        encoded_image = base64.b64encode(buffered.getvalue())
        src_str = 'data:image/png;base64,{}'.format(encoded_image.decode())
    else:
        return empty_plot
    return src_str
Ejemplo n.º 5
0
def make_image_grid(file_label,
                    smi_list,
                    labels=None,
                    out_dir=PNG_DIR,
                    mol_img_size=(400, 300),
                    write_output=True):
    """
    Given a molecular formula (or other label) and the set of SMI, make an image grid of all smiles within
    https://www.rdkit.org/docs/GettingStartedInPython.html
    :param file_label: str, such as chemical formula that corresponds to all smiles in SMILES set
    :param smi_list: list or set of SMILES strings; used to generate images
    :param labels: if None, will use the smi_list as labels; otherwise a list to use
    :param out_dir: directory where the file should be saved
    :param mol_img_size: tuple of ints to determine size of individual molecules
    :param write_output: boolean to determine whether to write to screen that a file was created
    :return: N/A, save a file
    """
    mols = []
    for smi in smi_list:
        mol = Chem.MolFromSmiles(smi)
        Compute2DCoords(mol)
        mols.append(mol)

    if labels:
        img_labels = labels
    else:
        img_labels = smi_list

    if len(mols) == 1:
        # didn't see a way for RDKit to add a label to an image with a single molecule (grid image does not work
        # for one image), so add to file name
        file_label += '_' + img_labels[0]
    fname = create_out_fname(file_label, ext='png', base_dir=out_dir)
    if len(mols) == 1:
        MolToFile(mols[0], fname, size=mol_img_size)
    else:
        img_grid = MolsToGridImage(mols,
                                   molsPerRow=3,
                                   subImgSize=mol_img_size,
                                   legends=img_labels)
        img_grid.save(fname)
    if write_output:
        print(f"Wrote file: {os.path.relpath(fname)}")
Ejemplo n.º 6
0
def draw_mol_labels(labels_dict, actions_history_smi_pop, actions_history_smi_removed,
                    actions_history_scores_pop, actions_history_scores_removed, legend_scores_keys_strat=None,
                    problem_type="max", mols_per_row=4, draw_n_mols=None):
    smi_to_draw = {}
    legends_to_draw = {}
    scores_float = {}

    for action_history_k in labels_dict.keys():

        if labels_dict[action_history_k] != "":

            if action_history_k in actions_history_smi_pop:
                smi = actions_history_smi_pop[action_history_k]
                smi_to_draw[labels_dict[action_history_k]] = smi

                legend, scores = compute_mol_legend(action_history_k, smi, actions_history_scores_pop,
                                                    legend_scores_keys_strat)
                legends_to_draw[labels_dict[action_history_k]] = legend
                scores_float[labels_dict[action_history_k]] = scores
            else:
                smi = actions_history_smi_removed[action_history_k]
                smi_to_draw[labels_dict[action_history_k]] = smi

                legend, scores = compute_mol_legend(action_history_k, smi, actions_history_scores_removed,
                                                    legend_scores_keys_strat)
                legends_to_draw[labels_dict[action_history_k]] = legend
                scores_float[labels_dict[action_history_k]] = scores

    mols = []
    legends = []
    scores_to_sort = []
    for k, smi in smi_to_draw.items():
        mols.append(MolFromSmiles(smi))
        legends.append(legends_to_draw[k])
        scores_to_sort.append(scores_float[k][0])


    mols = np.array(mols)
    legends = np.array(legends)

    # Sorting molecules
    sorted_order = np.argsort(scores_to_sort)
    if problem_type == "max":
        sorted_order = sorted_order[::-1]

    # Filtering molecules if necessary
    if draw_n_mols is not None:
        mols = mols[:draw_n_mols]
        legends = legends[:draw_n_mols]

    legends = list(legends[sorted_order])
    mols = list(mols[sorted_order])

    img = MolsToGridImage(mols, legends=legends, molsPerRow=mols_per_row, subImgSize=(200, 200))
    return img
Ejemplo n.º 7
0
def plot_top_n(smiles,
               ref_smiles,
               n=1,
               fp='FCFP4',
               sim='tanimoto',
               filename=None):
    mols = list()
    sims = list()
    for r in ref_smiles:
        m, s = get_most_similar(smiles,
                                referencemol=r,
                                n=n,
                                similarity=sim,
                                desc=fp)
        mols.extend([r] + m.tolist())
        sims.extend([1.] + s.tolist())
    img = MolsToGridImage([MolFromSmiles(mol) for mol in mols],
                          molsPerRow=n + 1,
                          subImgSize=(300, 300),
                          legends=["%.4f" % s for s in sims])
    if filename:
        img.save(filename)
        with open(filename[:-4] + '.csv', 'w') as f:
            [f.write("%s,%.4f\n" % (m, s)) for m, s in zip(mols, sims)]
    else:
        img.show()
Ejemplo n.º 8
0
Archivo: util.py Proyecto: prokia/htmd
def depictMultipleMols(mols_list,
                       filename=None,
                       ipython=False,
                       legends=None,
                       highlightAtoms=None,
                       mols_perrow=3):
    """
        Returns the image or the ipython rendering.

        Parameters
        ----------
        mols_list: list
            The list of the rdkit molecules to depict
        filename: str
            The filename of the image
        ipython: bool
            If True, the SVG rendering for jupiter-nootebook are returned
        legends: list
            List of titles subfigure for each molecule
        highlightAtoms: list
            List of list of atom index to highlight.
        mols_perrow: int
            The number of subfigures per row

        Returns
        -------
        svg: SVG
            If ipython set as True, the SVG rendering is returned

        """
    from rdkit.Chem.Draw import MolsToGridImage
    from IPython.display import SVG
    from os.path import splitext

    sel_atoms = []
    sel_colors = []
    if highlightAtoms is not None:
        if isinstance(highlightAtoms[0][0], list):
            sel_atoms = [[a for a in subset] for mol_set in highlightAtoms
                         for subset in mol_set]
            sel_colors = [{
                aIdx: _highlight_colors[n % len(_highlight_colors)]
                for aIdx in subset
            } for mol_set in highlightAtoms
                          for n, subset in enumerate(mol_set)]
        else:
            sel_atoms = highlightAtoms
            sel_colors = [{aIdx: _highlight_colors[0]
                           for aIdx in subset} for subset in highlightAtoms]

    svg = MolsToGridImage(mols_list,
                          highlightAtomLists=sel_atoms,
                          highlightBondLists=[],
                          highlightAtomColors=sel_colors,
                          legends=legends,
                          molsPerRow=mols_perrow,
                          useSVG=True)

    if filename:
        ext = splitext(filename)[-1]
        filename = filename if ext != '' else filename + '.svg'
        f = open(filename, 'w')
        f.write(svg)
        f.close()

    if ipython:
        return SVG(svg)
    else:
        return None
)

plt.figure(figsize=(10, 6))
plt.plot(history.history["AUC"], label="train AUC")
plt.plot(history.history["val_AUC"], label="valid AUC")
plt.xlabel("Epochs", fontsize=16)
plt.ylabel("AUC", fontsize=16)
plt.legend(fontsize=16)
"""
### Predicting
"""

molecules = [
    molecule_from_smiles(df.smiles.values[index]) for index in test_index
]
y_true = [df.p_np.values[index] for index in test_index]
y_pred = tf.squeeze(mpnn.predict(test_dataset), axis=1)

legends = [
    f"y_true/y_pred = {y_true[i]}/{y_pred[i]:.2f}" for i in range(len(y_true))
]
MolsToGridImage(molecules, molsPerRow=4, legends=legends)
"""
## Conclusions

In this tutorial, we demonstarted a message passing neural network (MPNN) to
predict blood-brain barrier permeability (BBBP) for a number of different molecules. We
first had to construct graphs from SMILES, and then build a Keras model that could
operate on these graphs.
"""
for p in predictions:
  smiles = ''.join(p) morning 
  if Chem.MolFromSmiles(smiles) is not None:
    molecules.append(smiles) 

for m in molecules:
  print(m)

smiles_list = [Chem.MolFromSmiles(x) for x in molecules]
print(sorted([x.GetNumAtoms() for x in smiles_list]))

good_mol_list = [x for x in smiles_list if x.GetNumAtoms() > 10 
                 and x.GetNumAtoms() < 50]
print(len(good_mol_list))

#obtain QED(drug-likeness) - drop all molecules with QED below 0.5
from rdkit.Chem import QED
qed_list = [QED.qed(x) for x in good_mol_list] 
final_mol_list = [(a,b) for a,b in
  zip(good_mol_list,qed_list) if b > 0.5]

for i in final_mol_list:
  print(i)

from rdkit.Chem.Draw import MolsToGridImage

#printing out the drawings of generated molecules 
MolsToGridImage([x[0] for x in final_mol_list], molsPerRow=3,useSVG=True, 
                subImgSize=(250, 250),
                legends=[f"{x[1]:.2f}" for x in final_mol_list])
#! /usr/bin/env python

import sys
from rdkit.Chem import SDMolSupplier, MolToPDBFile, AllChem, AddHs, RemoveHs
from rdkit.Chem.Draw import MolsToGridImage

spl = SDMolSupplier(sys.argv[1])
mols = [m for m in spl]

for i, m in enumerate(mols):
    m = AddHs(m)
    AllChem.EmbedMolecule(m, useBasicKnowledge=True, maxAttempts=100)
    AllChem.MMFFOptimizeMolecule(m)
    RemoveHs(m)
    MolToPDBFile(m, 'ligand_%d.pdb' % i)

img = MolsToGridImage(mols,
                      legends=["ligand_%d" % i for i in range(len(mols))])
img.save('ligands.png')