def display_selected_data(selectedData):
     max_structs = 12
     structs_per_row = 6
     empty_plot = ""
     if selectedData:
         if len(selectedData['points']) == 0:
             return empty_plot
         match_idx = [x['pointIndex'] for x in selectedData['points']]
         match_df = df.iloc[match_idx]
         smiles_list = list(match_df.SMILES)
         name_list = list(match_df.Name)
         active_list = list(df.is_active)
         mol_list = [Chem.MolFromSmiles(x) for x in smiles_list]
         name_list = [
             x + " " + str(y) for (x, y) in zip(name_list, active_list)
         ]
         img = MolsToGridImage(mol_list[0:max_structs],
                               molsPerRow=structs_per_row,
                               legends=name_list)
         buffered = BytesIO()
         img.save(buffered, format="JPEG")
         encoded_image = base64.b64encode(buffered.getvalue())
         src_str = 'data:image/png;base64,{}'.format(encoded_image.decode())
     else:
         return empty_plot
     return src_str
Exemple #2
0
def plot_top_n(smiles,
               ref_smiles,
               n=1,
               fp='FCFP4',
               sim='tanimoto',
               filename=None):
    mols = list()
    sims = list()
    for r in ref_smiles:
        m, s = get_most_similar(smiles,
                                referencemol=r,
                                n=n,
                                similarity=sim,
                                desc=fp)
        mols.extend([r] + m.tolist())
        sims.extend([1.] + s.tolist())
    img = MolsToGridImage([MolFromSmiles(mol) for mol in mols],
                          molsPerRow=n + 1,
                          subImgSize=(300, 300),
                          legends=["%.4f" % s for s in sims])
    if filename:
        img.save(filename)
        with open(filename[:-4] + '.csv', 'w') as f:
            [f.write("%s,%.4f\n" % (m, s)) for m, s in zip(mols, sims)]
    else:
        img.show()
Exemple #3
0
def display_selected_data(y, selection=None):
    max_structs = 24
    structs_per_row = 4
    empty_plot = ""
    if selection is None:
        return empty_plot
    else:
        match_idx = selection['BOX_SELECT']['data']
        st.write(main_df.iloc[match_idx])
        smis = main_df.loc[match_idx, 'smiles'].tolist()
        mols = [Chem.MolFromSmiles(smi) for smi in smis]
        name_list = list(main_df.iloc[match_idx][y])
        batch_list = [
            f"{step}_{batch_idx}"
            for step, batch_idx in main_df.loc[match_idx,
                                               ['step', 'batch_idx']].values
        ]
        name_list = [
            f"{x:.02f}" if isinstance(x, float) else f"{x}" for x in name_list
        ]
        legends = [
            f"{idx}\n{y}: {name}" for idx, name in zip(batch_list, name_list)
        ]
        img = MolsToGridImage(mols[0:max_structs],
                              molsPerRow=structs_per_row,
                              legends=legends[0:max_structs],
                              subImgSize=(300, 300))
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        encoded_image = base64.b64encode(buffered.getvalue())
        src_str = 'data:image/png;base64,{}'.format(encoded_image.decode())
        return src_str
def visualize(smi_file):
  import random
  import math
  from rdkit.Chem.Draw import MolsToGridImage
  from rdkit.Chem.rdmolfiles import SmilesMolSupplier

  # load molecules from file
  mols = SmilesMolSupplier(smi_file, sanitize=True, nameColumn=-1)

  n_samples = 100
  mols_list = [mol for mol in mols]
  mols_sampled = random.sample(mols_list, n_samples)  # sample 100 random molecules to visualize

  mols_per_row = int(math.sqrt(n_samples))            # make a square grid

  png_filename=smi_file[:-3] + "png"  # name of PNG file to create
  print(png_filename)
  labels=list(range(n_samples))       # label structures with a number

  # draw the molecules (creates a PIL image)
  img = MolsToGridImage(mols=mols_sampled,
                        molsPerRow=mols_per_row,
                        legends=[str(i) for i in labels])

  img.save(png_filename)
def display_selected_data(selectedData, y):
    max_structs = 12
    structs_per_row = 3
    empty_plot = ""
    if selectedData:
        if len(selectedData['points']) == 0:
            return empty_plot
        match_idx = [x['pointIndex'] for x in selectedData['points']]
        smiles_list = [
            Chem.MolFromSmiles(x) for x in list(main_df.iloc[match_idx].smiles)
        ]
        name_list = list(main_df.iloc[match_idx][y])
        batch_list = [
            f"{step}_{batch_idx}"
            for step, batch_idx in main_df.loc[match_idx,
                                               ['step', 'batch_idx']].values
        ]
        name_list = [
            f"{x:.02f}" if isinstance(x, float) else f"{x}" for x in name_list
        ]
        #active_list = list(main_df.iloc[match_idx].is_active)
        legends = [
            f"{idx}\n{y}: {name}" for idx, name in zip(batch_list, name_list)
        ]
        img = MolsToGridImage(smiles_list[0:max_structs],
                              molsPerRow=structs_per_row,
                              legends=legends,
                              subImgSize=(300, 300))
        buffered = BytesIO()
        img.save(buffered, format="JPEG")
        encoded_image = base64.b64encode(buffered.getvalue())
        src_str = 'data:image/png;base64,{}'.format(encoded_image.decode())
    else:
        return empty_plot
    return src_str
Exemple #6
0
def make_image_grid(file_label,
                    smi_list,
                    labels=None,
                    out_dir=PNG_DIR,
                    mol_img_size=(400, 300),
                    write_output=True):
    """
    Given a molecular formula (or other label) and the set of SMI, make an image grid of all smiles within
    https://www.rdkit.org/docs/GettingStartedInPython.html
    :param file_label: str, such as chemical formula that corresponds to all smiles in SMILES set
    :param smi_list: list or set of SMILES strings; used to generate images
    :param labels: if None, will use the smi_list as labels; otherwise a list to use
    :param out_dir: directory where the file should be saved
    :param mol_img_size: tuple of ints to determine size of individual molecules
    :param write_output: boolean to determine whether to write to screen that a file was created
    :return: N/A, save a file
    """
    mols = []
    for smi in smi_list:
        mol = Chem.MolFromSmiles(smi)
        Compute2DCoords(mol)
        mols.append(mol)

    if labels:
        img_labels = labels
    else:
        img_labels = smi_list

    if len(mols) == 1:
        # didn't see a way for RDKit to add a label to an image with a single molecule (grid image does not work
        # for one image), so add to file name
        file_label += '_' + img_labels[0]
    fname = create_out_fname(file_label, ext='png', base_dir=out_dir)
    if len(mols) == 1:
        MolToFile(mols[0], fname, size=mol_img_size)
    else:
        img_grid = MolsToGridImage(mols,
                                   molsPerRow=3,
                                   subImgSize=mol_img_size,
                                   legends=img_labels)
        img_grid.save(fname)
    if write_output:
        print(f"Wrote file: {os.path.relpath(fname)}")
#! /usr/bin/env python

import sys
from rdkit.Chem import SDMolSupplier, MolToPDBFile, AllChem, AddHs, RemoveHs
from rdkit.Chem.Draw import MolsToGridImage

spl = SDMolSupplier(sys.argv[1])
mols = [m for m in spl]

for i, m in enumerate(mols):
    m = AddHs(m)
    AllChem.EmbedMolecule(m, useBasicKnowledge=True, maxAttempts=100)
    AllChem.MMFFOptimizeMolecule(m)
    RemoveHs(m)
    MolToPDBFile(m, 'ligand_%d.pdb' % i)

img = MolsToGridImage(mols,
                      legends=["ligand_%d" % i for i in range(len(mols))])
img.save('ligands.png')