Ejemplo n.º 1
0
def generate_molecule_image(path, limit=25):
    from rdkit.Chem.Scaffolds import MurckoScaffold
    from rdkit.Chem import MolFromSmiles
    from rdkit.Chem.Draw import MolToImage
    from PIL import ImageDraw

    if os.path.exists(path):
        # Read the hits file
        smiles = []
        ids = []
        with open(path, 'r') as top_hits:
            for line_number, line in enumerate(top_hits.readlines()):
                if line_number >= limit:
                    break
                smiles.append(line.split(" ")[0])
                ids.append(line.split(" ")[1])

        # Generate scaffold
        for smile, mid in zip(smiles, ids):
            mol = MurckoScaffold.GetScaffoldForMol(MolFromSmiles(smile))
            image = MolToImage(mol)

            # Add text to the image
            draw = ImageDraw.Draw(image)
            draw.text((5, 5), mid, fill="black", align="right")
            image.save("GUI/images/molecules/{}.png".format(smile))
    else:
        return
Ejemplo n.º 2
0
def draw(mol, quality=1, ax=None):
    """Draw a molecule on a matplotlib axis.

    Args:
        mol (skchem.Mol):
            The molecule to be drawn.
        quality (int):
            The level of quality.  Higher quality takes more time, but will
            look better (so long as matplotlib's savefig.dpi is high enough).
        ax (plt.Axes or None):
            An existing axis on which to draw the molecule.

    Returns:
        plt.AxesImage:
            A matplotlib AxesImage object with the molecule drawn.
    """

    if not ax:
        ax = plt.gca()
    ax.grid('off')
    ax.axis('off')

    opts = DrawingOptions()
    opts.dotsPerAngstrom *= quality
    opts.atomLabelFontSize *= quality
    opts.bondLineWidth *= quality

    size = 300 * quality

    img, canvas, drawer = MolToImage(mol,
                                     size=(size, size),
                                     options=opts,
                                     returnCanvas=True)
    canvas.flush()
    return ax.imshow(img, extent=(0, 1, 0, 1))
Ejemplo n.º 3
0
def compute_mol_attributes(graph,
                           labels_dict,
                           actions_history_smi_pop,
                           actions_history_smi_removed,
                           actions_history_scores_pop,
                           actions_history_scores_removed,
                           legend_scores_keys_strat=None):
    images_attributes = {}
    scores_attributes = {}

    draw_opt = DrawingOptions()
    draw_opt.coordScale = 0.9
    draw_opt.dotsPerAngstrom = 30

    for action_history_k in labels_dict.keys():

        if action_history_k in actions_history_smi_pop:

            smi = actions_history_smi_pop[action_history_k]
            img = MolToImage(MolFromSmiles(smi),
                             size=(800, 800),
                             options=draw_opt)
            images_attributes[action_history_k] = crop_image_with_transparency(
                img)

            legend, _ = compute_mol_legend(action_history_k, smi,
                                           actions_history_scores_pop,
                                           legend_scores_keys_strat)
            scores_attributes[action_history_k] = legend

        else:

            smi = actions_history_smi_removed[action_history_k]
            img = MolToImage(MolFromSmiles(smi),
                             size=(800, 800),
                             options=draw_opt)
            images_attributes[action_history_k] = crop_image_with_transparency(
                img)

            legend, _ = compute_mol_legend(action_history_k, smi,
                                           actions_history_scores_removed,
                                           legend_scores_keys_strat)

            scores_attributes[action_history_k] = legend

    nx.set_node_attributes(graph, images_attributes, "image")
    nx.set_node_attributes(graph, scores_attributes, "score_label")
Ejemplo n.º 4
0
def mol_to_pil(mol, size=(400, 200)):
    """
    Returns a Python Image Library (PIL) object containing an image of the given molecule's structure.

    Args:
        mol (rdkit.Chem.Mol): Object representing molecule.

        size (tuple): Width and height of bounding box of image.

    Returns:
        PIL.PngImageFile: An object containing an image of the molecule's structure.

    """
    pil = MolToImage(mol, size=(size[0], size[1]))
    return pil
Ejemplo n.º 5
0
def getModeMurckoScaffoldImage(SMILES_list):
    """
    returns the most common murcko scaffold given a list of smiles as an rdkit image.
    """
    murckoScaffolds = []

    # Looping through and getting the scaffolds for each smile
    for smile in SMILES_list:
        m1 = Chem.MolFromSmiles(smile)
        core = MurckoScaffold.GetScaffoldForMol(m1)
        murckoScaffolds.append(core)

    # Finding the mode Scaffold:
    mode = max(set(murckoScaffolds), key=murckoScaffolds.count)
    PIL_img_mode = MolToImage(mode, size=(700, 700))
    return PIL_img_mode
Ejemplo n.º 6
0
def plot_weights(mol, weights, quality=1, l=0.4, step=50, levels=20,
                 contour_opacity=0.5, cmap='RdBu', ax=None, **kwargs):
    """ Plot weights as a sum of gaussians across a structure image.

    Args:
        mol (skchem.Mol):
            Molecule to visualize weights for.
        weights (iterable<float>):
            Array of weights in atom index order.
        l (float):
            Lengthscale of gaussians to visualize as a multiple of bond length.
        step (int):
            Size of grid edge to calculate the gaussians.
        levels (int):
            Number of contours to plot.
        contour_opacity (float):
            Alpha applied to the contour layer.
        ax (plt.axis):
            Axis to apply the plot to. Defaults to current axis.
        cmap (plt.cm):
            Colormap to use for the contour.
        **kwargs:
            Passed to contourf function.

    Returns:
        matplotlib.AxesSubplot: The plot.
    """

    if not ax:
        ax = plt.gca()
    ax.grid('off')
    ax.axis('off')

    opts = DrawingOptions()
    opts.dotsPerAngstrom *= quality
    opts.atomLabelFontSize *= quality
    opts.bondLineWidth *= quality

    size = 300 * quality

    img, canvas, drawer = MolToImage(mol, size=(size, size), options=opts,
                                     returnCanvas=True)
    canvas.flush()
    coords = [[i / size, 1 - j / size]
              for k, (i, j) in list(drawer.atomPs.values())[0].items()]
    coords = np.array(coords)

    b = mol.bonds[0]
    begin, end = b.GetBeginAtom().GetIdx(), b.GetEndAtom().GetIdx()
    length = np.linalg.norm(coords[end] - coords[begin])

    x = np.linspace(0, 1, 500)
    y = np.linspace(0, 1, 500)
    x, y = np.meshgrid(x, y)

    def gaussian(x, y, mu=np.zeros(2), sigma=np.identity(2), size=50):
        return (1 / (2 * np.pi * sigma[0, 0] * sigma[1, 1]) *
                np.exp(-((x - mu[0]) ** 2 / (2 * sigma[0, 0] ** 2) +
                         (y - mu[1]) ** 2 / (2 * sigma[1, 1] ** 2))))

    if not np.max(weights) == np.min(weights) == 0:
        z = sum([w * gaussian(x, y, mu, sigma=l * length * np.identity(2))
                 for mu, w in zip(coords, weights)])
        v = np.max((np.abs(z.min()), np.abs(z.max())))
    else:
        z = np.zeros(x.shape)
        v = 1

    if z.min() >= 0:
        levels = int(levels/2)
    ax.contourf(x, y, z, levels, alpha=contour_opacity,
                extent=(0, 1, 0, 1), vmin=-v, vmax=v, cmap=cmap, **kwargs)

    ax.imshow(img, extent=(0, 1, 0, 1))
    return ax
Ejemplo n.º 7
0
def smileToMurckoScaffoldImage(smile):
    # Returns a PIL MURCKO scaffold image of the given smile.
    m1 = Chem.MolFromSmiles(smile)
    core = MurckoScaffold.GetScaffoldForMol(m1)
    return MolToImage(core, size=(700, 700)) # image
Ejemplo n.º 8
0
# https://github.com/iwatobipen/chemo_info/blob/master/rdkit_notebook/drawmol_with_idx.ipynb
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.Draw import MolToImage

smile_list = [
    # 'CC(C)(C)Cl', 'CC(C)(C)O',
    # 'CC(C)Br', 'CC(C)C'
    # 'CCc1ccccc1C', 'CCc1ccccc1O'
    'Cc1ccccc1Cl', 'Cc1ccccc1N', 'Cc1ccccc1O'
    # 'CCCC#C', 'CCCC=C'
    # 'CCCC(=C)C', 'CCCC(=O)C', 'CCCC(=O)O'
    ]

for smile in smile_list:
    mol = Chem.MolFromSmiles(smile)
    plt = MolToImage(mol)
    plt.show()
Ejemplo n.º 9
0
def molimg(mol, size=(320, 320), **mol_to_image_kwargs):
    display(MolToImage(mol, size, **mol_to_image_kwargs))
Ejemplo n.º 10
0
    except:
        smi = "-"
    return smi


stnd_smis = df["SMILES"].apply(stnd_func)
df["STND_SMILES"] = stnd_smis

base_dir = "pic_raw_vs_stnd"
for i in range(len(df)):
    smi, stnd_smi = df.loc[i][["SMILES", "STND_SMILES"]]
    current_dir = os.path.join(base_dir, str(i))
    os.makedirs(current_dir, exist=True)

    try:
        MolToImage(MolFromSmiles(smi)).save(
            os.path.join(current_dir, "smi.jpeg"))
    except Exception as e:
        print(i, e)

    try:
        MolToImage(MolFromSmiles(stnd_smi)).save(
            os.path.join(current_dir, "stnd_smi.jpeg"))
    except Exception as e:
        print(i, e)

assert len(set(
    df["STND_SMILES"].values)) == len(df), "Standardize Smiles reduced"

non_cases = np.squeeze(np.argwhere((df["STND_SMILES"] == "-").values))

df = df.drop(non_cases.tolist())
def mol_to_pil(mol, size=(400, 200)):
    """
    Returns a Python Image Library (PIL) object containing an image of the given molecule's structure.
    """
    pil = MolToImage(mol, size=(size[0], size[1]))
    return pil
Ejemplo n.º 12
0
def kmeans_for_atom(atom_rep_data, target_index_list, select_atom='C'):
    X = np.array(atom_rep_data[0])
    # y = np.array(atom_rep_data[1])
    y = np.array([int(ele - 1) for ele in atom_rep_data[1]])
    index = np.array(atom_rep_data[2])

    if select_atom == 'O':
        numbers = list(range(1, 5))
        classes = ['oh', 'oa', 'o', 'unO']
        y = y
    elif select_atom == 'C':
        numbers = np.array(list(range(5, 11))) - 5
        classes = ['c3', 'c1', 'ca', 'c', 'c2', 'unC']
        y = y - 5
    elif select_atom == 'N':
        numbers = np.array(list(range(11, 18))) - 11
        classes = ['n1', 'n3', 'na', 'n', 'no', 'nh', 'unN']
        y = y - 11

    row = np.zeros((y.shape), dtype=bool)
    for j in numbers:
        row = np.logical_or(row, y == j)

    #y_ohehot = np.zeros((y.shape[0], 18))
    #y_ohehot[np.arange(y.shape[0]), y] = 1
    X_select = X[row]
    y_select = y[row]
    index_select = index[row]

    kmeans = KMeans(n_clusters=len(numbers), random_state=0).fit(X_select)
    y_pred = kmeans.labels_
    print(confusion_matrix(y_select, y_pred))

    plot_confusion_matrix(y_select, np.array(y_pred), classes, normalize=True)

    smile_list = []
    with open('{}{}'.format('../data/', 'ESOL_cleaned.csv'), 'r') as data_fid:
        reader = csv.reader(data_fid, delimiter=',', quotechar='"')
        i = -1
        for row in reader:
            if i in target_index_list:
                smile = row[9]
                smile_list.append(smile)
            i += 1

    # p = index.argsort()
    # sorted_X = X[p]
    # sorted_y = y[p]
    # sorted_y_pred = y_pred[p]

    for idx in range(len(target_index_list)):
        row2 = np.zeros((y_select.shape), dtype=bool)
        row2 = np.logical_or(row2, index_select[:,
                                                0] == target_index_list[idx])
        if row2.sum() == 0:
            print('{}th index data not in dumped data.'.format(
                target_index_list[idx]))
            continue
        index_check = index_select[row2]
        y_idx = y_select[row2]
        y_pred_idx = y_pred[row2]

        print('The {}th intereted smile is {}.'.format(idx, smile_list[idx]))
        mol = mol_with_atom_index(smile_list[idx])
        plt = MolToImage(mol)
        plt.show()
        print('The {} atoms includes {}, the kmeans predicted labels are {}'.
              format(select_atom, y_idx, y_pred_idx))
        print('\n')