Ejemplo n.º 1
0
def PrintAsBase64PNGString(x, renderer=None):
  '''returns the molecules as base64 encoded PNG image
  '''
  if highlightSubstructures and hasattr(x, '__sssAtoms'):
    highlightAtoms = x.__sssAtoms
  else:
    highlightAtoms = []
  if molRepresentation.lower() == 'svg':
    from IPython.display import SVG
    svg = Draw._moltoSVG(x, molSize, highlightAtoms, "", True)
    return SVG(svg).data
  else:
    data = Draw._moltoimg(x, molSize, highlightAtoms, "", returnPNG=True, kekulize=True)
    return '<img src="data:image/png;base64,%s" alt="Mol"/>' % _get_image(data)
Ejemplo n.º 2
0
def _toPNG(mol):
    if hasattr(mol, '__sssAtoms'):
        highlightAtoms = mol.__sssAtoms
    else:
        highlightAtoms = []
    kekulize = kekulizeStructures
    return Draw._moltoimg(mol, molSize, highlightAtoms, "", returnPNG=True, kekulize=kekulize)
Ejemplo n.º 3
0
def make_similarity_maps(mol, weights, colorMap=cm.PiYG, scale=-1, size=(250, 250), sigma=None,coordScale=1.5, step=0.01, colors='k', contourLines=10, alpha=0.5, **kwargs):
    """Function to calculate similarity maps
    Heavily based on the similarity map function in the RDKit. A few changes
    to deal with exceptions and change rendering,
    Takes an RDKit molecule and a list of atom-based weights.
    Returns an image."""
    if mol.GetNumAtoms() < 2:
        raise ValueError("too few atoms")
    fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs)
    if sigma is None:
        if mol.GetNumBonds() > 0:
            bond = mol.GetBondWithIdx(0)
            idx1 = bond.GetBeginAtomIdx()
            idx2 = bond.GetEndAtomIdx()
            sigma = 0.3 * math.sqrt(sum([(mol._atomPs[idx1][i] - mol._atomPs[idx2][i]) ** 2 for i in range(2)]))
        else:
            sigma = 0.3 * math.sqrt(sum([(mol._atomPs[0][i] - mol._atomPs[1][i]) ** 2 for i in range(2)]))
        sigma = round(sigma, 2)
    x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step)
    # scaling
    if scale <= 0.0:
        maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z)))
    else:
        maxScale = scale
    # coloring
    cax = fig.axes[0].imshow(z, cmap=colorMap, interpolation='bilinear', origin='lower', extent=(0,1,0,1), vmin=-maxScale, vmax=maxScale)
    cbar = fig.colorbar(cax, shrink=.75, pad=.02,ticks=[-maxScale, 0, maxScale], orientation='vertical')
    cbar.ax.set_yticklabels(['', '', ''])  # contour lines
    fig.axes[0].contour(x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs)
    return fig
Ejemplo n.º 4
0
def GetSimilarityMapFromWeights(
    mol,
    weights,
    colorMap=cm.PiYG,
    scale=-1,
    size=(250, 250),
    sigma=None,  # @UndefinedVariable  #pylint: disable=E1101
    coordScale=1.5,
    step=0.01,
    colors="k",
    contourLines=10,
    alpha=0.5,
    **kwargs
):
    """
  Generates the similarity map for a molecule given the atomic weights.

  Parameters:
    mol -- the molecule of interest
    colorMap -- the matplotlib color map scheme
    scale -- the scaling: scale < 0 -> the absolute maximum weight is used as maximum scale
                          scale = double -> this is the maximum scale
    size -- the size of the figure
    sigma -- the sigma for the Gaussians
    coordScale -- scaling factor for the coordinates
    step -- the step for calcAtomGaussian
    colors -- color of the contour lines
    contourLines -- if integer number N: N contour lines are drawn
                    if list(numbers): contour lines at these numbers are drawn
    alpha -- the alpha blending value for the contour lines
    kwargs -- additional arguments for drawing
  """
    if mol.GetNumAtoms() < 2:
        raise ValueError("too few atoms")
    fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs)
    if sigma is None:
        if mol.GetNumBonds() > 0:
            bond = mol.GetBondWithIdx(0)
            idx1 = bond.GetBeginAtomIdx()
            idx2 = bond.GetEndAtomIdx()
            sigma = 0.3 * math.sqrt(sum([(mol._atomPs[idx1][i] - mol._atomPs[idx2][i]) ** 2 for i in range(2)]))
        else:
            sigma = 0.3 * math.sqrt(sum([(mol._atomPs[0][i] - mol._atomPs[1][i]) ** 2 for i in range(2)]))
        sigma = round(sigma, 2)
    x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step)
    # scaling
    if scale <= 0.0:
        maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z)))
    else:
        maxScale = scale
    # coloring
    fig.axes[0].imshow(
        z, cmap=colorMap, interpolation="bilinear", origin="lower", extent=(0, 1, 0, 1), vmin=-maxScale, vmax=maxScale
    )
    # contour lines
    # only draw them when at least one weight is not zero
    if len([w for w in weights if w != 0.0]):
        fig.axes[0].contour(x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs)
    return fig
Ejemplo n.º 5
0
def _toSVG(mol):
  if not ipython_useSVG:
    return None
  if hasattr(mol, '__sssAtoms'):
    highlightAtoms = mol.__sssAtoms
  else:
    highlightAtoms = []
  return Draw._moltoSVG(mol,molSize,highlightAtoms,"",kekulize)
Ejemplo n.º 6
0
def GetSimilarityMapFromWeights(mol, weights, colorMap=None, scale=-1, size=(250, 250),
                                sigma=None, coordScale=1.5, step=0.01, colors='k', contourLines=10,
                                alpha=0.5, **kwargs):
  """
  Generates the similarity map for a molecule given the atomic weights.

  Parameters:
    mol -- the molecule of interest
    colorMap -- the matplotlib color map scheme, default is custom PiWG color map
    scale -- the scaling: scale < 0 -> the absolute maximum weight is used as maximum scale
                          scale = double -> this is the maximum scale
    size -- the size of the figure
    sigma -- the sigma for the Gaussians
    coordScale -- scaling factor for the coordinates
    step -- the step for calcAtomGaussian
    colors -- color of the contour lines
    contourLines -- if integer number N: N contour lines are drawn
                    if list(numbers): contour lines at these numbers are drawn
    alpha -- the alpha blending value for the contour lines
    kwargs -- additional arguments for drawing
  """
  if mol.GetNumAtoms() < 2:
    raise ValueError("too few atoms")
  fig = Draw.MolToMPL(mol, coordScale=coordScale, size=size, **kwargs)
  if sigma is None:
    if mol.GetNumBonds() > 0:
      bond = mol.GetBondWithIdx(0)
      idx1 = bond.GetBeginAtomIdx()
      idx2 = bond.GetEndAtomIdx()
      sigma = 0.3 * math.sqrt(
        sum([(mol._atomPs[idx1][i] - mol._atomPs[idx2][i])**2 for i in range(2)]))
    else:
      sigma = 0.3 * math.sqrt(sum([(mol._atomPs[0][i] - mol._atomPs[1][i])**2 for i in range(2)]))
    sigma = round(sigma, 2)
  x, y, z = Draw.calcAtomGaussians(mol, sigma, weights=weights, step=step)
  # scaling
  if scale <= 0.0:
    maxScale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z)))
  else:
    maxScale = scale
  # coloring
  if colorMap is None:
    PiYG_cmap = cm.get_cmap('PiYG',2)
    colorMap = LinearSegmentedColormap.from_list('PiWG', [PiYG_cmap(0), (1.0, 1.0, 1.0), PiYG_cmap(1)], N=255)

  fig.axes[0].imshow(z, cmap=colorMap, interpolation='bilinear', origin='lower',
                     extent=(0, 1, 0, 1), vmin=-maxScale, vmax=maxScale)
  # contour lines
  # only draw them when at least one weight is not zero
  if len([w for w in weights if w != 0.0]):
    contourset = fig.axes[0].contour(x, y, z, contourLines, colors=colors, alpha=alpha, **kwargs)
    for j, c in enumerate(contourset.collections):
        if contourset.levels[j] == 0.0:
            c.set_linewidth(0.0)
        elif contourset.levels[j] < 0:
            c.set_dashes([(0, (3.0, 3.0))])
  fig.axes[0].set_axis_off()
  return fig
Ejemplo n.º 7
0
def MolToImage(mol, max_size=(1000, 1000), kekulize=True, options=None,
               canvas=None, **kwargs):
    '''Wrapper for RDKit's MolToImage. If mol == None, an arrow is drawn'''

    if not options:
        options = defaultDrawOptions()
    if mol == '->':
        subImgSize = (160, 160)
        img, canvas = Draw._createCanvas(subImgSize)
        p0 = (10, subImgSize[1]//2)
        p1 = (subImgSize[0]-10, subImgSize[1]//2)
        p3 = (subImgSize[0]-20, subImgSize[1]//2-10)
        p4 = (subImgSize[0]-20, subImgSize[1]//2+10)
        canvas.addCanvasLine(p0, p1, lineWidth=2, color=(0, 0, 0))
        canvas.addCanvasLine(p3, p1, lineWidth=2, color=(0, 0, 0))
        canvas.addCanvasLine(p4, p1, lineWidth=2, color=(0, 0, 0))
        if hasattr(canvas, 'flush'):
            canvas.flush()
        else:
            canvas.save()
        return img
    elif mol is not None:
        return Draw.MolToImage(mol, size=max_size, kekulize=kekulize, options=options,
                               canvas=canvas, **kwargs)
    else:  # retro arrow or error
        subImgSize = (160, 160)
        (a, b) = subImgSize
        img, canvas = Draw._createCanvas(subImgSize)
        canvas.addCanvasLine((10, b//2-7), (a-17, b//2-7),
                             lineWidth=2, color=(0, 0, 0))
        canvas.addCanvasLine((10, b//2+7), (a-17, b//2+7),
                             lineWidth=2, color=(0, 0, 0))
        canvas.addCanvasLine((a-24, b//2-14), (a-10, b//2),
                             lineWidth=2, color=(0, 0, 0))
        canvas.addCanvasLine((a-24, b//2+14), (a-10, b//2),
                             lineWidth=2, color=(0, 0, 0))
        if hasattr(canvas, 'flush'):
            canvas.flush()
        else:
            canvas.save()
        return img
Ejemplo n.º 8
0
def MycreateCanvas(size, color='white'):
    # noinspection PyProtectedMember
    useAGG, useCairo, Canvas = Draw._getCanvas()
    if useAGG or useCairo:
        try:
            import Image
        except ImportError:
            from PIL import Image
        img = Image.new("RGBA", size, color)
        canvas = Canvas(img)
        return img, canvas
    return None, None
Ejemplo n.º 9
0
def generateSimilarityMaps(mols, weights, fp):
    '''Generates a similarity map for a set of molecules and weights'''
    # colormap to use
    mycm = cm.PiYG
    # loop over molecules
    for i,m in enumerate(mols):
        fig = Draw.MolToMPL(m, coordScale=1.5, size=(250,250))
        # the values 0.02 and 0.01 can be adjusted for the size of the molecule
        x,y,z = Draw.calcAtomGaussians(m, 0.02, step=0.01, weights=weights[i])
        # use the maximum absolute peak as maximum scale
        maxscale = max(math.fabs(numpy.min(z)), math.fabs(numpy.max(z)))
        # this does the coloring
        fig.axes[0].imshow(z, cmap=mycm, interpolation='bilinear', origin='lower', extent=(0,1,0,1), vmin=-maxscale, vmax=maxscale)
        # this draws 10 contour lines
        # alternatively also the z values for the lines can be specified
        fig.axes[0].contour(x, y, z, 10, colors='k', alpha=0.5)
        # this writes the figure in a file
        fig.savefig('pics/mol'+str(i+1)+'_'+fp+'.png', bbox_inches='tight')
Ejemplo n.º 10
0
    def getMapping(moli, molj, hydrogens=False, fname=None, time_out=150):
        """
        Compute the MCS between two passed molecules
    
        Parameters
        ----------

        moli : RDKit molecule object 
            the first molecule used to perform the MCS calculation
        molj : RDKit molecule object 
            the second molecule used to perform the MCS calculation
        hydrogens : bool 
            incluse or not the hydrogens in the MCS calculation

        fname : string 
            the filename used to output a png file depicting the MCS mapping 

        time_out: int
            the max time in seconds used to compute the MCS

        Returns:
        --------
        map_moli_molj: python list of tuple [...(i,j)...]
            the list of tuple which contains the atom mapping indexes between 
            the two molecules. The indexes (i,j) are resplectively related to 
            the first (moli) and the second (molj) passed molecules 
                 
        """

        #Molecule copies
        moli_c = Chem.Mol(moli)
        molj_c = Chem.Mol(molj)

        if not hydrogens:
            moli_c = AllChem.RemoveHs(moli_c)
            molj_c = AllChem.RemoveHs(molj_c)

        # MCS calculaton. In RDKit the MCS is a smart string. Ring atoms are
        # always mapped in ring atoms.
        mcs = rdFMCS.FindMCS([moli_c, molj_c],
                             timeout=time_out,
                             atomCompare=rdFMCS.AtomCompare.CompareAny,
                             bondCompare=rdFMCS.BondCompare.CompareAny,
                             matchValences=False,
                             ringMatchesRingOnly=True,
                             completeRingsOnly=False,
                             matchChiralTag=False)

        # Checking
        if mcs.canceled:
            raise ValueError('Timeout! No MCS found between passed molecules')

        if mcs.numAtoms == 0:
            raise ValueError('No MCS was found between the molecules')

        # The found MCS pattern (smart strings) is converted to a RDKit molecule
        mcs_mol = Chem.MolFromSmarts(mcs.smartsString)

        try:
            Chem.SanitizeMol(mcs_mol)
        except Exception:  # if not try to recover the atom aromaticity wich is
            # important for the ring counter
            sanitFail = Chem.SanitizeMol(
                mcs_mol,
                sanitizeOps=Chem.SanitizeFlags.SANITIZE_SETAROMATICITY,
                catchErrors=True)
            if sanitFail:  # if not the MCS is skipped
                raise ValueError('Sanitization Failed...')

        # mcs indexes mapped back to the first molecule moli
        if moli_c.HasSubstructMatch(mcs_mol):
            moli_sub = moli_c.GetSubstructMatch(mcs_mol)
        else:
            raise ValueError('RDkit MCS Subgraph first molecule search failed')
        # mcs indexes mapped back to the second molecule molj
        if molj_c.HasSubstructMatch(mcs_mol):
            molj_sub = molj_c.GetSubstructMatch(mcs_mol)
        else:
            raise ValueError(
                'RDkit MCS Subgraph second molecule search failed')

        if mcs_mol.HasSubstructMatch(mcs_mol):
            mcs_sub = mcs_mol.GetSubstructMatch(mcs_mol)
        else:
            raise ValueError('RDkit MCS Subgraph search failed')

        # Map between the two molecules
        map_moli_to_molj = zip(moli_sub, molj_sub)

        # depict the mapping by using a .png file
        if fname:
            AllChem.Compute2DCoords(moli_c)
            AllChem.Compute2DCoords(molj_c)
            AllChem.Compute2DCoords(mcs_mol)

            DrawingOptions.includeAtomNumbers = True

            moli_fname = 'Moli'
            molj_fname = 'Molj'
            mcs_fname = 'Mcs'

            img = Draw.MolsToGridImage(
                [moli_c, molj_c, mcs_mol],
                molsPerRow=3,
                subImgSize=(400, 400),
                legends=[moli_fname, molj_fname, mcs_fname],
                highlightAtomLists=[moli_sub, molj_sub, mcs_sub])

            img.save(fname)

            DrawingOptions.includeAtomNumbers = False

        return map_moli_to_molj
Ejemplo n.º 11
0
def visualize_mol(path, new_mol):
    # kk: output/draw the dividetree
    AllChem.Compute2DCoords(new_mol)
    print(path)
    Draw.MolToFile(new_mol, path)
Ejemplo n.º 12
0
def find_minimum_subgraph(smiles, selected_atoms, vis_dir=None):
    mol = Chem.MolFromSmiles(smiles)
    clusters, atom_cls = find_clusters(mol)
    selected_clusters = set()
    cluster_votes = {}
    # First iteration: select a cluster when,
    #   1. An atom uniquely belongs to this cluster,
    #   2. Two atoms belong to this cluster.
    for atom in selected_atoms:
        assert len(atom_cls[atom]) > 0
        if len(atom_cls[atom]) == 1:
            selected_clusters.add(atom_cls[atom][0])
        else:
            for cls in atom_cls[atom]:
                if cls not in cluster_votes:
                    cluster_votes[cls] = 0
                cluster_votes[cls] += 1
                if cluster_votes[cls] >= 2:
                    selected_clusters.add(cls)
    # Second iteration: randomly select a cluster for the remaining atoms.
    for atom in selected_atoms:
        selected = False
        for cls in atom_cls[atom]:
            if cls in selected_clusters:
                selected = True
                break
        if not selected:
            selected_clusters.add(atom_cls[atom][0])

    cluster_neighbor = {}
    for i in range(len(clusters)):
        cluster_neighbor[i] = set()
        for atom in clusters[i]:
            cluster_neighbor[i].update(atom_cls[atom])
        cluster_neighbor[i].remove(i)

    # remove degree-1 unselected clusters iteratively
    leaf_clusters = set()
    while True:
        updated = False
        for i in range(len(clusters)):
            if i in selected_clusters or i in leaf_clusters:
                continue
            if len(cluster_neighbor[i]) > 1:
                removable = True
                neighbor_pairs = [(j, k) for j in cluster_neighbor[i] for k in cluster_neighbor[i] if j < k]
                for j, k in neighbor_pairs:
                    if j not in cluster_neighbor[k] or k not in cluster_neighbor[j]:
                        removable = False
                        break
                if not removable:
                    continue

            leaf_clusters.add(i)
            for j in cluster_neighbor[i]:
                cluster_neighbor[j].remove(i)
            updated = True

        if not updated:
            break

    minimum_atoms = set()
    for i in range(len(clusters)):
        if i not in leaf_clusters:
            minimum_atoms.update(clusters[i])

    minimum_smiles, _ = extract_subgraph(smiles, minimum_atoms)
    # print(f'{smiles} --> {minimum_smiles}')

    if vis_dir is not None:
        png_f = f'atoms_selected{len(selected_atoms)}.png'
        Draw.MolToFile(mol, filename=os.path.join(vis_dir, png_f), highlightAtoms=selected_atoms)
        png_f = f'atoms_minimum{len(selected_atoms)}.png'
        Draw.MolToFile(mol, filename=os.path.join(vis_dir, png_f), highlightAtoms=minimum_atoms)
        png_f = f'atoms_minimum_extracted{len(selected_atoms)}.png'
        Draw.MolToFile(Chem.MolFromSmiles(minimum_smiles), filename=os.path.join(vis_dir, png_f))

    return minimum_smiles
Ejemplo n.º 13
0
def PrintAsBase64PNGString(x,renderer = None):
  '''returns the molecules as base64 encoded PNG image
  '''
  return '<img src="data:image/png;base64,%s" alt="Mol"/>'%_get_image(Draw.MolToImage(x))
Ejemplo n.º 14
0
print("Dataset target: ", dataset.num_classes)
print("Dataset length: ", dataset.len)
print("Dataset sample: ", dataset[0])
print("Sample  nodes: ", dataset[0].num_nodes)
print("Sample  edges: ", dataset[0].num_edges)
print("Edges indexes: ", dataset[0].edge_index.t())

print(dataset[0].x)

print(dataset[0].edge_index.t())

from rdkit import Chem
from rdkit.Chem import Draw
molecule = Chem.MolFromSmiles(dataset[0]["smiles"])

fig = Draw.MolToImage(molecule, size=(360, 360))

fig.save('/home/anaconda3/work//molecule_first.png')

#data.num_classes

#data.num_edges

#data.num_node_features

#data.contains_isolated_nodes()

#data.contains_self_loops()

#data.is_directed()
    s_list = []
    s_list.append(s_origin[i_latent])

    ### Generated Molecules by VAE
    s_z = np.random.normal(
        0.0, 1.0, [batch_size, latent_dim]) * perturb + latent_vector[i_latent]
    mol_gen = model.generate_molecule(s_z)
    for i in range(len(mol_gen)):
        for j in range(100):
            s = stochastic_convert_to_smiles(mol_gen[i], char)
            try:
                m = Chem.MolFromSmiles(s)
                if m:
                    if s in s_list:
                        pass
                    else:
                        s_list.append(s)
                        break
            except:
                pass

    print(len(s_list))
    for i in s_list:
        print(s_list[i])

    mol_list = [Chem.MolFromSmiles(s) for s in s_list]
    img = Draw.MolsToGridImage([mol for mol in mol_list], molsPerRow=5)
    #del mol_list[0]
    img.save('./figures/convVAE_' + str(i_latent) + '_' + str(perturb) +
             '.png')
Ejemplo n.º 16
0
def main(argv):
    sqlitefile = ''
    # Process command line options
    try:
        opts, args = getopt.getopt(argv, 't:s:d:',
                                   ['targetID=', 'sqlitefile=', 'panddadir='])
    except getopt.GetoptError as err:
        print err
        print 'process.py -t <TargetID> -s <SQLiteFile> -d <PANDDA dir>'
        sys.exit(2)
    if len(opts) < 3:
        print 'Missing arguments:'
        print 'process.py -t <TargetID> -s <SQLiteFile> -d <PANDDA dir>'
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print 'process.py -t <TargetID> -s <SQLiteFile>'
            sys.exit()
        elif opt in ("-t", "--targetID"):
            targetID = arg
        elif opt in ("-s", "--sqlitefile"):
            sqlitefile = arg
        elif opt in ("-d", "--panddadir"):
            panddadir = arg

    # Create directory structure
    if not os.path.exists(panddadir + "/compoundImages"):
        os.makedirs(panddadir + "/compoundImages")
    if not os.path.exists(panddadir + "/icbs"):
        os.makedirs(panddadir + "/icbs")
    if not os.path.exists(panddadir + "/pdbs"):
        os.makedirs(panddadir + "/pdbs")
    if not os.path.exists(panddadir + "/maps"):
        os.makedirs(panddadir + "/maps")
    if not os.path.exists(panddadir + "/residueplots"):
        os.makedirs(panddadir + "/residueplots")
    if not os.path.exists(panddadir + "/mapImages"):
        os.makedirs(panddadir + "/mapImages")

    # Create HTML file and write header
    htmlfile = open(panddadir + "/index.html", "w")
    htmlfile.write("<html>\n")
    htmlfile.write("<head>\n")
    htmlfile.write(
        '<link rel="stylesheet" type="text/css" href="css/jquery.dataTables.min.css">\n'
    )
    htmlfile.write(
        '<meta http-equiv="Content-type" content="text/html; charset=utf-8">\n'
    )
    htmlfile.write(
        '<meta name="viewport" content="width=device-width,initial-scale=1">\n'
    )
    htmlfile.write('<title>' + targetID + ' Fragment Hits</title>\n')
    htmlfile.write(
        '<script type="text/javascript" language="javascript" src="js/jquery-1.12.3.min.js">\n'
    )
    htmlfile.write('</script>\n')
    htmlfile.write(
        '<script type="text/javascript" language="javascript" src="js/jquery.dataTables.min.js">\n'
    )
    htmlfile.write('</script>\n')
    htmlfile.write('<script type="text/javascript" class="init">\n')
    htmlfile.write('$(document).ready(function() {\n')
    htmlfile.write("$('#example').DataTable( {\n")
    htmlfile.write("'bautoWidth': false,\n")
    htmlfile.write("'columns': [\n")
    htmlfile.write("{ 'width': '6%' },\n")
    htmlfile.write("{ 'width': '6%' },\n")
    htmlfile.write("{ 'width': '7%' },\n")
    htmlfile.write("{ 'width': '8%' },\n")
    htmlfile.write("{ 'width': '6%' },\n")
    htmlfile.write("{ 'width': '6%' },\n")
    htmlfile.write("{ 'width': '9%' },\n")
    htmlfile.write("{ 'width': '9%' },\n")
    htmlfile.write("{ 'width': '12%' },\n")
    htmlfile.write("{ 'width': '5%' },\n")
    htmlfile.write("{ 'width': '4%' },\n")
    htmlfile.write("{ 'width': '6%' },\n")
    htmlfile.write("{ 'width': '6%' },\n")
    htmlfile.write("{ 'width': '3%' },\n")
    htmlfile.write("{ 'width': '4%' },\n")
    htmlfile.write("{ 'width': '3%' }\n")
    htmlfile.write("]\n")
    htmlfile.write('} )\n')
    htmlfile.write('} );\n')
    htmlfile.write('</script>\n')
    htmlfile.write("</head>\n")
    htmlfile.write("<body>\n")
    htmlfile.write("<H3>Ligand-bound models for " + targetID + "</h3>")
    htmlfile.write("""<h4>Interpreting 'Ligand confidence'</h4>
<p><u>4 - High Confidence:</u>  The expected ligand was easily interpretable from clear density, and subsequent refinement was well-behaved.  This ligand can be trusted.
<br><u>3 - Clear density, unexpected ligand:</u>  Density very clearly showed a well-defined ligand, but that ligand was unexpected in that crystal/dataset.  The observed ligand was modelled anyway, because its presence could be explained in some way.
<br><u>2 - Correct ligand, weak density:</u>  Though density was weak, it was possible to model the expected ligand, possibly including other circumstantial evidence (e.g. similar ligand in another model).
<br><u>1 - Low Confidence:</u>  The ligand model is to be treated with scepticism, because the evidence (density, identity, pose) were not convincing.
<h4>Interpreting 'Model status':</h4>
<p><u>6 - Deposited:</u>  The model has been deposited in the PDB.
<br><u>5 - Deposition ready:</u>  The model is fully error-free, in every residue, and is ready for deposition.
<br><u>4 - CompChem ready:</u>  The model is complete and correct in the region of the bound ligand.  There may be remaining small errors elsewhere in the structure, but they are far away and unlikely to be relevant to any computational analysis or compound design.
<h4>Interpreting 'Ligand validation' spider plots:</h4>  Each axis represents one of the values described below; small is better, and large values on any axis implies that further investigation is warranted.
<p><u>Quality (RSCC)</u> reflects the fit of the atoms to the experimental density, and should typically be greater than 0.7.
<br><u>Accuracy (RSZD)</u> measures the amount of difference density that is found around these atoms, and should be below 3.
<br><u>B-factor ratio</u> measures the consistency of the model with surrounding protein, and is calculated from the B factors of respectively the changed atoms and all side-chain atoms within 4&#8491;.  Large values (>3) reflect poor evidence for the model, and intermediate values (1.5+) indicate errors in refinement or modelling; for weakly-binding ligands, systematically large ratios may be justifiable.
<br><u>RMSD</u> compares the positions of all atoms built into event density, with their positions after final refinement, and should be below 1&#8491;.
<br><u>Precision (RSZO/OCC)</u> measures how clear the density is after refinement.  (This is not a quality indicator, but is related to strength of binding but not in a straightforward way.)
<p></p>\n""")
    htmlfile.write("<h4>Download data</h4>\n")
    htmlfile.write("<ul>\n")
    htmlfile.write(
        "<li><a href='pdbs/allPDBs.zip'>Download all PDB model files<a></li>\n"
    )
    htmlfile.write(
        "<li><a href='maps/allEventMaps.zip'>Download all Event Map files<a></li>\n"
    )
    htmlfile.write("</ul>")
    htmlfile.write('<table id="example" class="display" cellspacing="0">\n')
    htmlfile.write("<thead>\n")
    htmlfile.write("<tr>\n")
    htmlfile.write("<th>Model Name</th>\n")
    htmlfile.write("<th>Compound SMILES</th>\n")
    htmlfile.write("<th>Compound Structure</th>\n")
    htmlfile.write("<th>Site Name</th>\n")
    htmlfile.write("<th>Ligand Confidence</th>\n")
    htmlfile.write("<th>Model Status</th>\n")
    htmlfile.write("<th>Ligand Validation</th>\n")
    htmlfile.write("<th>Event Map 3D</th>\n")
    htmlfile.write("<th>Comment</th>\n")
    htmlfile.write("<th>PDB Identifier</th>\n")
    htmlfile.write("<th>Resol</th>\n")
    htmlfile.write("<th>Spacegroup</th>\n")
    htmlfile.write("<th>Cell</th>\n")
    htmlfile.write("<th>PDB</th>\n")
    htmlfile.write("<th>MTZ</th>\n")
    htmlfile.write("<th>Event Map</th>\n")
    htmlfile.write("</tr>\n")
    htmlfile.write("</thead>\n")
    htmlfile.write("<tfoot>\n")
    htmlfile.write("<tr>\n")
    htmlfile.write("<th>Model Name</th>\n")
    htmlfile.write("<th>Compound SMILES</th>\n")
    htmlfile.write("<th>Compound Structure</th>\n")
    htmlfile.write("<th>Site Name</th>\n")
    htmlfile.write("<th>Ligand Confidence</th>\n")
    htmlfile.write("<th>Model Status</th>\n")
    htmlfile.write("<th>Ligand Validation</th>\n")
    htmlfile.write("<th>Event Map 3D</th>\n")
    htmlfile.write("<th>Comment</th>\n")
    htmlfile.write("<th>PDB Identifier</th>\n")
    htmlfile.write("<th>Resol</th>\n")
    htmlfile.write("<th>Spacegroup</th>\n")
    htmlfile.write("<th>Cell</th>\n")
    htmlfile.write("<th>PDB</th>\n")
    htmlfile.write("<th>MTZ</th>\n")
    htmlfile.write("<th>Event Map</th>\n")
    htmlfile.write("</tr>\n")
    htmlfile.write("</tfoot>\n")
    htmlfile.write("<tbody>\n")

    # Now walk through the input data
    with open('foricm.csv', 'wb') as f:
        with sqlite3.connect(sqlitefile) as c:
            c.row_factory = sqlite3.Row
            cur = c.cursor()

            #      sql = ( "select p.ID,p.CrystalName,p.PANDDA_site_event_index,p.CrystalName || '_event'|| p.PANDDA_site_event_index "
            #              " as ModelName,m.CompoundCode,m.CompoundSMILES,p.PANDDA_site_name,p.PANDDA_site_confidence "
            #              " as LigandConfidence,p.RefinementOutcome "
            #              " as ModelStatus,p.PANDDA_site_comment,p.PANDDA_site_x,p.PANDDA_site_y,p.PANDDA_site_z, "
            #              "                p.PANDDA_site_spider_plot,m.DataProcessingResolutionHigh,m.DataProcessingSpaceGroup,"
            #              "                m.DataProcessingUnitCell,m.RefinementPDB_latest,m.RefinementMTZ_latest,p.PANDDA_site_event_map "
            #              " from panddaTable as p, mainTable as m "
            #              " where p.CrystalName=m.CrystalName and p.PANDDA_site_ligand_placed='True' and "
            #              "       (LigandConfidence like '1%' or LigandConfidence like '2%' or LigandConfidence like '3%' or LigandConfidence like '4%') "
            #              " order by p.CrystalName,ModelStatus desc,PANDDA_site_event_index"
            #      )

            # query below is without the LigandConfidence being constrained; this is because some older DBs don't have a starting digit
            # here we constrain RefinementOutcome of site
            #      cur.execute("select p.ID,p.CrystalName,p.PANDDA_site_event_index,p.CrystalName || '_event'|| p.PANDDA_site_event_index as ModelName,m.CompoundCode,m.CompoundSMILES,m.Deposition_PDB_ID,p.PANDDA_site_name,p.PANDDA_site_confidence as LigandConfidence,p.RefinementOutcome as ModelStatus,p.PANDDA_site_comment,p.PANDDA_site_x,p.PANDDA_site_y,p.PANDDA_site_z, p.PANDDA_site_spider_plot,m.DataProcessingResolutionHigh,m.DataProcessingSpaceGroup,m.DataProcessingUnitCell,m.RefinementBoundConformation,m.RefinementMTZ_latest,p.PANDDA_site_event_map from panddaTable as p, mainTable as m where p.CrystalName=m.CrystalName and p.PANDDA_site_ligand_placed='True' and (p.RefinementOutcome like '4%' or p.RefinementOutcome like '5%' or p.RefinementOutcome like '6%')  order by p.CrystalName,ModelStatus desc,PANDDA_site_event_index")

            sql = (
                "select p.ID,p.CrystalName,p.PANDDA_site_event_index,p.CrystalName || '_event'|| p.PANDDA_site_event_index "
                " as ModelName,m.CompoundCode,m.CompoundSMILES,m.Deposition_PDB_ID,p.PANDDA_site_name,"
                " p.PANDDA_site_confidence as LigandConfidence,"
                " p.RefinementOutcome as ModelStatus,"
                " p.PANDDA_site_comment,p.PANDDA_site_x,p.PANDDA_site_y,p.PANDDA_site_z, p.PANDDA_site_spider_plot,"
                " m.DataProcessingResolutionHigh,m.DataProcessingSpaceGroup,m.DataProcessingUnitCell,"
                " m.RefinementBoundConformation,m.RefinementMTZ_latest,"
                " p.PANDDA_site_event_map from panddaTable as p, "
                " mainTable as m where p.CrystalName=m.CrystalName and p.PANDDA_site_ligand_placed='True' "
                " and (p.RefinementOutcome like '4%' or p.RefinementOutcome like '5%' or p.RefinementOutcome like '6%')  "
                " and (LigandConfidence like '1%' or LigandConfidence like '2%' or LigandConfidence like '3%' or LigandConfidence like '4%')"
                " order by p.CrystalName,ModelStatus desc,PANDDA_site_event_index"
            )

            sql = (
                "select p.ID,p.CrystalName,p.PANDDA_site_event_index,p.CrystalName || '_event'|| p.PANDDA_site_event_index "
                " as ModelName,m.CompoundCode,m.CompoundSMILES,m.Deposition_PDB_ID,p.PANDDA_site_name,"
                " p.PANDDA_site_confidence as LigandConfidence,"
                " p.RefinementOutcome as ModelStatus,"
                " p.PANDDA_site_comment,p.PANDDA_site_x,p.PANDDA_site_y,p.PANDDA_site_z, p.PANDDA_site_spider_plot,"
                " m.DataProcessingResolutionHigh,m.DataProcessingSpaceGroup,m.DataProcessingUnitCell,"
                " m.RefinementBoundConformation,m.RefinementMTZ_latest,"
                " p.PANDDA_site_event_map from panddaTable as p, "
                " mainTable as m where p.CrystalName=m.CrystalName and p.PANDDA_site_ligand_placed='True' "
                " and (m.RefinementOutcome like '4%' or m.RefinementOutcome like '5%' or m.RefinementOutcome like '6%')  "
                " and (LigandConfidence like '1%' or LigandConfidence like '2%' or LigandConfidence like '3%' or LigandConfidence like '4%')"
                " order by p.CrystalName,ModelStatus desc,PANDDA_site_event_index"
            )

            cur.execute(sql)

            rows = cur.fetchall()
            if not rows:
                print '==> WARNING: none of your samples seems to be at least CompChem ready (4)'
                return None
            writer = csv.DictWriter(f, fieldnames=rows[1].keys())
            writer.writeheader()
            for row in rows:
                # Make compound structure
                print row['ModelName'], row['PANDDA_site_spider_plot']
                compound = Chem.MolFromSmiles(
                    row['CompoundSMILES'].encode("ascii"))
                Draw.MolToFile(
                    compound, panddadir + '/compoundImages/' +
                    row['CompoundCode'] + '.png', (150, 150))
                # Write out table information for event
                eventID = row['ModelName'] + "_" + row['CompoundCode']
                actID = (row['ModelName'] + row['CompoundCode']).replace(
                    targetID + '-', '')
                writeTableRow(row, htmlfile)
                writeICBPage(row, panddadir)
                try:
                    shutil.copy(
                        row['RefinementBoundConformation'],
                        panddadir + "/pdbs/" + row['ModelName'] + ".pdb")
                    shutil.copy(
                        row['RefinementMTZ_latest'],
                        panddadir + "/maps/" + row['ModelName'] + ".mtz")
                    shutil.copy(
                        row['PANDDA_site_event_map'],
                        panddadir + "/maps/" + row['ModelName'] + ".ccp4")
                    if row['PANDDA_site_spider_plot'] is not None:
                        shutil.copy(
                            row['PANDDA_site_spider_plot'], panddadir +
                            "/residueplots/" + row['ModelName'] + ".png")
                except (IOError, TypeError):
                    print '*** WARNING: cannot find PDB and/or MTZ of ' + row[
                        'ModelName'] + ' ***'
                    print 'PDB bound  :', row['RefinementBoundConformation']
                    print 'MTZ        :', row['RefinementMTZ_latest']
                    print 'event map  :', row['PANDDA_site_event_map']
                    print 'spider plot:', row['PANDDA_site_spider_plot']
                    pass


#        shutil.copy(row['RefinementPDB_latest'],panddadir+"/pdbs/"+row['ModelName']+".pdb")
#        if row['PANDDA_site_spider_plot'] is not None:
#          shutil.copy(row['PANDDA_site_spider_plot'],panddadir+"/residueplots/"+row['ModelName']+".png")
# Write row to CSV for ICM
                writer.writerow(dict(row))

    # Conclude HTML
    htmlfile.write("</tbody>\n")
    htmlfile.write("</table>\n")
    htmlfile.write("</body>\n")
    htmlfile.write("</html>\n")
    htmlfile.close()

    # Copy JS & CSS files
    if not os.path.exists(panddadir + "/js"):
        os.makedirs(panddadir + "/js")
    if not os.path.exists(panddadir + "/css"):
        os.makedirs(panddadir + "/css")
    shutil.copy(
        os.path.join(os.getenv('XChemExplorer_DIR'),
                     "web/jscss/css/jquery.dataTables.min.css"),
        panddadir + "/css/jquery.dataTables.min.css")
    shutil.copy(
        os.path.join(os.getenv('XChemExplorer_DIR'),
                     "web/jscss/js/jquery-1.12.3.min.js"),
        panddadir + "/js/jquery-1.12.3.min.js")
    shutil.copy(
        os.path.join(os.getenv('XChemExplorer_DIR'),
                     "web/jscss/js/jquery.dataTables.min.js"),
        panddadir + "/js/jquery.dataTables.min.js")

    # Create zip files
    print "Creating zipfile of PDBs..."
    os.chdir(panddadir + "/pdbs")
    zf = zipfile.ZipFile("allPDBs.zip", "w")
    for pdb in glob.glob("*.pdb"):
        zf.write(pdb)
    zf.close()

    print "Creatig zipfile of event maps..."
    os.chdir("../maps")
    zf = zipfile.ZipFile("allEventMaps.zip", "w")
    for pdb in glob.glob("*.mtz"):
        zf.write(pdb)
    zf.close()

    # change folder permissions
    os.system('chmod -R 775 {0!s}'.format(panddadir))

    return
Ejemplo n.º 17
0
def draw_dummy_core_ixns(mol, core, bonds, dummy_group, color_blind=False):
    """
    Draw a grid of molecules with interactions between atoms in dummy_group
    and the core highlighted.
    """

    if color_blind:
        COLOR_DUMMY_IXN = rgb_to_decimal(230, 159, 0)
        COLOR_DUMMY_ACTIVE = rgb_to_decimal(240, 228, 66)
        COLOR_DUMMY_INACTIVE = rgb_to_decimal(0, 158, 115)
        COLOR_CORE_ACTIVE = rgb_to_decimal(213, 94, 0)
        COLOR_CORE_INACTIVE = rgb_to_decimal(204, 121, 167)
        COLOR_BOND = (0.96, 0.74, 0)
    else:
        COLOR_DUMMY_IXN = (0, 0.7, 0)
        COLOR_DUMMY_ACTIVE = (0.6, 1, 0.6)
        COLOR_DUMMY_INACTIVE = (0.188, 0.835, 0.784)
        COLOR_CORE_ACTIVE = (0.9, 0.5, 0.5)
        COLOR_CORE_INACTIVE = (1, 0.8, 0.8)
        COLOR_BOND = (0.96, 0.74, 0)

    assert len(set(core).intersection(set(dummy_group))) == 0

    highlightAtomLists = []
    highlightBondLists = []
    highlightAtomColorsLists = []
    highlightBondColorsLists = []
    all_mols = []

    bonds.sort(key=len)

    legends = []

    for atom_idxs in bonds:

        mol_copy = Chem.Mol(mol)
        highlightAtomColors = {}
        highlightBondColors = {}
        highlightAtoms = set()
        highlightBonds = set()

        # default colors
        for a in range(mol_copy.GetNumAtoms()):
            highlightAtoms.add(a)
            if a in core:
                highlightAtomColors[a] = COLOR_CORE_INACTIVE
            elif a in dummy_group:
                highlightAtomColors[a] = COLOR_DUMMY_ACTIVE
            else:
                highlightAtomColors[a] = COLOR_DUMMY_INACTIVE

        # interacting atoms
        for a in atom_idxs:
            atom = mol_copy.GetAtomWithIdx(int(a))
            atom.SetProp("molAtomMapNumber", str(atom.GetIdx()))
            if a in core:
                highlightAtomColors[a] = COLOR_CORE_ACTIVE
            else:
                highlightAtomColors[a] = COLOR_DUMMY_IXN

        is_improper = False
        for idx in range(len(atom_idxs)):
            if idx != len(atom_idxs) - 1:
                bond = mol_copy.GetBondBetweenAtoms(int(atom_idxs[idx]), int(atom_idxs[idx + 1]))

                # this may be none if we have an improper torsion
                if bond is None:
                    if len(atom_idxs) == 4:
                        # improper
                        is_improper = True
                    else:
                        assert 0, "Bad idxs"

                else:
                    highlightBonds.add(bond.GetIdx())
                    highlightBondColors[bond.GetIdx()] = COLOR_BOND

        highlightAtomLists.append(list(range(mol_copy.GetNumAtoms())))
        highlightBondLists.append(list(highlightBonds))
        highlightAtomColorsLists.append(highlightAtomColors)
        highlightBondColorsLists.append(highlightBondColors)
        if is_improper:
            label = "improper"
        elif len(atom_idxs) == 2:
            label = "bond"
        elif len(atom_idxs) == 3:
            label = "angle"
        elif len(atom_idxs) == 4:
            label = "proper"
        else:
            assert 0

        all_mols.append(mol_copy)
        legends.append(label + " " + repr([int(x) for x in atom_idxs]))

    return Draw.MolsToGridImage(
        all_mols,
        molsPerRow=4,
        highlightAtomLists=highlightAtomLists,
        highlightAtomColors=highlightAtomColorsLists,
        highlightBondLists=highlightBondLists,
        highlightBondColors=highlightBondColorsLists,
        subImgSize=(250, 250),
        legends=legends,
        useSVG=True,
    )
Ejemplo n.º 18
0
#!/usr/bin/env python
import rdkit.Chem.Draw as Draw
import rdkit.Chem.Crippen as Crippen
import rdkit.Chem as Chem
import matplotlib.cm as cm

if __name__ == "__main__":
    m = Chem.MolFromSmiles("CCC")
    fig=Draw.MolToMPL(m)
    x,y,z=Draw.calcAtomGaussians(m,0.03,step=0.01,weights=(40,1,3))
    fig.axes[0].imshow(z,cmap=cm.Oranges,interpolation='bilinear',origin='lower',extent=(0,1,0,1))
    fig.axes[0].contour(x,y,z,20,colors='k',alpha=0.5)
    #fig.show()
    fig.savefig('coumlogps.colored.png',bbox_inches='tight')
Ejemplo n.º 19
0
 def testGithubIssue54(self):
     # Assert that radicals depict with PIL
     os.environ['RDKIT_CANVAS'] = 'sping'
     mol = Chem.MolFromSmiles('c1([O])ccc(O)cc1')
     img = Draw.MolToImage(mol)
     self.assertTrue(img)
Ejemplo n.º 20
0
 def testGithub1829(self):
   d = Draw.MolDraw2DSVG(300, 300, 100, 100)
   d.DrawMolecules(tuple())
   d.FinishDrawing()
   d.GetDrawingText()
Ejemplo n.º 21
0
def main():
    #first things first, load the dataframe of molecules and their properties:
    #df = pd.read_csv('sample.smifi')
    df = load_df()
    #handy values
    mwmin = float(df['mw'].min()-1)
    mwmax = float(df['mw'].max()+1)
    clogpmin = float(df['clogp'].min()-1)
    clogpmax = float(df['clogp'].max()-1)


    #print out some explanation stuff in the sidebar:
    st.sidebar.title("WDMPLL?")
    st.sidebar.write("If you want to see your favourite molecular property included, drop a line at [@lewischewis](https://twitter.com/lewischewis) or ljmartin at hey dot com, or open a github issue")
    st.sidebar.write("""If you ask 'but why?' or 'but how?', see the readme at the [github page](https://github.com/ljmartin/what_do_mol_prop_look_like)""")
    st.sidebar.write('Click the ✖️ to close this bar and widen the view')


    #and some intro text in the main frame:
    st.title('What do molecular properties look like?')
    st.write("""The [Lipinski Ro5](https://en.wikipedia.org/wiki/Lipinski%27s_rule_of_five) helps people focus their drug discovery efforts on the molecules most likely to make good therapeutic drugs.""")
    st.write("""But, [increasingly](https://doi.org/10.1021/acs.jmedchem.8b00686), drug-like molecules break the Ro5, so it's helpful to push the boundaries of molecular properties when considering a molecule library. One way to get a feel for how far they can be pushed is to just stare at molecules in a certain property-space and decide if they look reasonable or not.""")
    st.write("""### Instructions""")
    st.write('There are sliders below that set the minimum or maximum Molecular Weight (MW) or calculated logP (cLogP). First, set a desired range. Then, click the "**Show Sample**" button. A small sample of 24 molecules satisfying the filters will be chosen and visualized. Just click it again to get a new batch.')

    st.write("""### Histograms """)
    st.write("If you set an unrealistic range, there won't be any molecules left. There are 500k molecules in the set, but the distribution isn't uniform. Here's a guide to help:")
    st.image('density.svg')
    

    

    st.write("""### Filters:""")
    ###now the app:
    
    #property sliders:    
    mw_min = st.slider('Molecular weight (MW) min:', 
                       min_value = mwmin,
                       max_value = mwmax,
                       #value = float(np.percentile(df['mw'], 5)),
                       value = (mwmax-mwmin)*0.05 + mwmin,
                       step=0.05
                       )
    mw_max = st.slider('Molecular weight (MW) max:', 
                       min_value = mwmin,
                       max_value = mwmax,
                       value = (mwmax-mwmin)*0.95 + mwmin,
                       )

    
    clogp_min = st.slider('cLogP min',
                          min_value = clogpmin,
                          max_value = clogpmax,
                          value = (clogpmax-clogpmin)*0.05 + clogpmin,
                         )
    clogp_max = st.slider('cLogP max',
                          min_value = clogpmin,
                          max_value = clogpmax,
                          value = (clogpmax-clogpmin)*0.95 + clogpmin
                         )

    
    
    mask = (df['mw'] <= mw_max) & (df['mw'] >= mw_min) \
            & (df['clogp'] <= clogp_max) & (df['clogp'] >= clogp_min)

    st.write("""### Molecules:""")
    st.write('Number of molecules left: ', mask.sum())


    #this is the main event. Based on the filters/sliders above:
    #1. select a random sample of N ligands that meet the selected filter.
    #2. turn them into molecules,
    #3. and draw!
    N = 24
    if st.button('Show sample'):

        ##1:
        mask = (df['mw'] <= mw_max) & (df['mw'] >= mw_min) \
            & (df['clogp'] <= clogp_max) & (df['clogp'] >= clogp_min)

        ##1.5: quick error check:
        flag = mask.sum()>0
        if not flag:
            st.write('Set the property filters again - there are no molecules that fit those parameters')

        ##good to go.
        else:
            sample = df[mask].sample(min([N, mask.sum()]))
            ##2:
            mols = [Chem.MolFromSmiles(i) for i in sample['smiles']]
            
            ##3:
            st.image(Draw.MolsToGridImage(mols, molsPerRow=6, legends=list(sample['zinc_id'])))
Ejemplo n.º 22
0
  def testRepeatedPrepareForDrawingCalls(self):
    m = Chem.MolFromMolBlock("""
          11280715312D 1   1.00000     0.00000     0

 33 36  0     1  0            999 V2000
    7.6125   -5.7917    0.0000 C   0  0  0  0  0  0           0  0  0
    7.0917   -6.0917    0.0000 C   0  0  1  0  0  0           0  0  0
    6.4792   -6.8917    0.0000 C   0  0  2  0  0  0           0  0  0
    8.1292   -6.0792    0.0000 N   0  0  0  0  0  0           0  0  0
    5.5042   -6.8917    0.0000 C   0  0  3  0  0  0           0  0  0
   11.2375   -4.8542    0.0000 N   0  0  0  0  0  0           0  0  0
    9.6792   -5.1667    0.0000 N   0  0  3  0  0  0           0  0  0
    5.9917   -6.5417    0.0000 C   0  0  0  0  0  0           0  0  0
    7.6042   -5.1917    0.0000 O   0  0  0  0  0  0           0  0  0
   10.7167   -5.1625    0.0000 C   0  0  0  0  0  0           0  0  0
    6.2917   -7.4667    0.0000 C   0  0  0  0  0  0           0  0  0
    6.5750   -5.7917    0.0000 C   0  0  0  0  0  0           0  0  0
   10.2000   -4.8667    0.0000 C   0  0  0  0  0  0           0  0  0
    8.6500   -5.7792    0.0000 C   0  0  3  0  0  0           0  0  0
    8.6417   -5.1792    0.0000 C   0  0  0  0  0  0           0  0  0
    9.1667   -6.0750    0.0000 C   0  0  0  0  0  0           0  0  0
    9.6875   -5.7667    0.0000 C   0  0  0  0  0  0           0  0  0
    9.1542   -4.8750    0.0000 C   0  0  0  0  0  0           0  0  0
    5.6917   -7.4667    0.0000 C   0  0  0  0  0  0           0  0  0
    5.2042   -7.4042    0.0000 F   0  0  0  0  0  0           0  0  0
    4.9875   -6.5917    0.0000 F   0  0  0  0  0  0           0  0  0
    7.5167   -6.5167    0.0000 O   0  0  0  0  0  0           0  0  0
   11.7542   -5.1500    0.0000 C   0  0  0  0  0  0           0  0  0
   11.2417   -6.0542    0.0000 C   0  0  0  0  0  0           0  0  0
   10.7250   -5.7625    0.0000 C   0  0  0  0  0  0           0  0  0
    6.5750   -5.1917    0.0000 C   0  0  0  0  0  0           0  0  0
    6.0542   -6.0917    0.0000 C   0  0  0  0  0  0           0  0  0
   11.7667   -5.7542    0.0000 C   0  0  0  0  0  0           0  0  0
   12.2750   -4.8417    0.0000 C   0  0  0  0  0  0           0  0  0
    6.0542   -4.8917    0.0000 C   0  0  0  0  0  0           0  0  0
    5.5375   -5.7917    0.0000 C   0  0  0  0  0  0           0  0  0
    5.5375   -5.1917    0.0000 C   0  0  0  0  0  0           0  0  0
    6.3167   -6.3042    0.0000 H   0  0  0  0  0  0           0  0  0
  2  1  1  0     0  0
  3  2  1  0     0  0
  4  1  1  0     0  0
  5  8  1  0     0  0
  6 10  1  0     0  0
  7 17  1  0     0  0
  8  3  1  0     0  0
  9  1  2  0     0  0
 10 13  1  0     0  0
 11  3  1  0     0  0
 12  2  1  0     0  0
 13  7  1  0     0  0
 14  4  1  0     0  0
 15 14  1  0     0  0
 16 14  1  0     0  0
 17 16  1  0     0  0
 18 15  1  0     0  0
 19 11  1  0     0  0
 20  5  1  0     0  0
 21  5  1  0     0  0
  2 22  1  6     0  0
 23  6  2  0     0  0
 24 25  1  0     0  0
 25 10  2  0     0  0
 26 12  1  0     0  0
 27 12  2  0     0  0
 28 24  2  0     0  0
 29 23  1  0     0  0
 30 26  2  0     0  0
 31 27  1  0     0  0
 32 31  2  0     0  0
  3 33  1  6     0  0
  7 18  1  0     0  0
 19  5  1  0     0  0
 32 30  1  0     0  0
 28 23  1  0     0  0
M  END""")
    nm = Draw.PrepareMolForDrawing(m)
    self.assertEqual(nm.GetBondBetweenAtoms(2, 1).GetBondType(), Chem.BondType.SINGLE)
    self.assertEqual(nm.GetBondBetweenAtoms(2, 1).GetBondDir(), Chem.BondDir.NONE)
    self.assertEqual(nm.GetBondBetweenAtoms(2, 7).GetBondType(), Chem.BondType.SINGLE)
    self.assertEqual(nm.GetBondBetweenAtoms(2, 7).GetBondDir(), Chem.BondDir.BEGINWEDGE)
    nm = Draw.PrepareMolForDrawing(nm)
    self.assertEqual(nm.GetBondBetweenAtoms(2, 1).GetBondType(), Chem.BondType.SINGLE)
    self.assertEqual(nm.GetBondBetweenAtoms(2, 1).GetBondDir(), Chem.BondDir.NONE)
    self.assertEqual(nm.GetBondBetweenAtoms(2, 7).GetBondType(), Chem.BondType.SINGLE)
    self.assertEqual(nm.GetBondBetweenAtoms(2, 7).GetBondDir(), Chem.BondDir.BEGINWEDGE)
Ejemplo n.º 23
0
samples = pd.read_csv(
    f'../cbas/slurm/results/{name}/docking_results/{step}.csv')
samples = samples.sort_values('norm_score')

smiles = samples.smile
scores = samples.norm_score
mols = [Chem.MolFromSmiles(s) for s in smiles]

mols = mols[-N_top:]
scores = scores[-N_top:]

save_smiles['smile'] += list(samples.smile[-N_top:])

img = Draw.MolsToGridImage(mols,
                           molsPerRow=4,
                           legends=[
                               f'{sc:.2f}, {q:.2f}'
                               for i, (sc, q) in enumerate(zip(scores, qeds))
                           ])

soft_mkdir('plots')
img.save(f'plots/cbas_{name}_mols_{step}.png')

df = pd.DataFrame.from_dict(save_smiles)

df.to_csv('clogp_smiles.csv')

#=======

# name = 'clogp_adam_clamp_less'
#
# norm_scores = False # set to true for clogp
Ejemplo n.º 24
0
#! /usr/bin/python
# coding: utf-8
# @Time: 2020-05-29 14:36:04
# @Author: zeoy
# rdkit 修改分子

# 一、引入所需库
from rdkit import Chem
from rdkit.Chem import Draw


# 二、增删H原子
mol = Chem.MolFromSmiles('OC1C2C1CC2')
# 画分子结构
Draw.MolToImageFile(
    mol,
    '/drug_development/studyRdkit/st_rdcit/img/mol5.jpg'
)

# 2.1 增加H原子函数解析

# 将氢添加到分子图上
rdkit.Chem.rdmolops.AddHs(
    (Mol)mol   # 要修饰的分子
    [, (bool) explicitOnly=False  # (可选)如果设置了此切换,则仅将显式Hs添加到分子中。默认值为0(添加隐式和显式Hs)。
     [, (bool) addCoords=False  # (可选) 如果设置了此开关,则Hs将设置3D坐标。默认值为0(无3D坐标)。
      [, (AtomPairsParameters) onlyOnAtoms=None  # (可选)如果提供了此序列,则仅将这些原子视为具有添加的Hs
       [, (bool)addResidueInfo=False  # (可选)如果为true,则将残基信息添加到氢原子(对PDB文件有用)。
        ]]]]
)

# 2.2 增加H原子
Ejemplo n.º 25
0
        react_atoms, react_atom_tags = get_tagged_atoms_from_mol(mol_react)
        react_atoms_index = [atom.GetIdx() for atom in react_atoms]
        react_atom_index_all = [atom.GetIdx() for atom in mol_react.GetAtoms()]
        atoms_to_remove = [
            idx for idx in react_atom_index_all if idx not in react_atoms_index
        ]
        atoms_to_remove.sort(reverse=True)

        emol = Chem.EditableMol(mol_react)
        for atom in atoms_to_remove:
            emol.RemoveAtom(atom)

        mol_new = emol.GetMol()
        Chem.SanitizeMol(mol_new)
        smi_react = Chem.MolToSmiles(mol_new)
        print('smi_react:', smi_react)

        react_mols.append(mol_new)
        react_smis.append(smi_react)

    img = Draw.MolsToGridImage(react_mols,
                               molsPerRow=1,
                               subImgSize=(200, 200),
                               legends=react_smis)

    plt.imshow(img)
    plt.tight_layout()
    plt.axis('off')
    plt.show()
Ejemplo n.º 26
0
# In[4]:

Cnlist = f.get_Cnlist_from_label2("C(C)-C(C)-C(C-C-C-C-C-C)-C-C-C-C-C-C-C-C-C")

# In[5]:

print(Cnlist)

# In[6]:

for Cn in Cnlist:
    with open('KNApSAck_mol/%s.mol' % (Cn)) as fi:
        mol = Chem.MolFromMolBlock(fi.read())
        rdDepictor.Compute2DCoords(mol)
    filename = f.gene + "/" + Cn + "back.png"
    Draw.MolToFile(mol, filename)
    break

# Cnlist内のCnの図を全て保存するためのコード

# とりあえず必要なのだけ抜く

# In[9]:

for Cn in ["C00017726", "C00026595", "C00026596", "C00015229", "C00015228"]:
    with open('KNApSAck_mol/%s.mol' % (Cn)) as fi:
        mol = Chem.MolFromMolBlock(fi.read())
        rdDepictor.Compute2DCoords(mol)
    filename = f.gene + "/" + Cn + "back.png"
    Draw.MolToFile(mol, filename, size=(500, 500))
Ejemplo n.º 27
0
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
import sys
print 'Number of arguments:', len(sys.argv), 'arguments.'
print 'Argument List:', str(sys.argv)
print sys.argv[1]
#smiles = "CN1C=NC2=C1C(=O)N(C(=O)N2C)C"
smiles = sys.argv[1]
file = sys.argv[2]
x = int(sys.argv[3])
y = int(sys.argv[4])
print smiles
mol = AllChem.MolFromSmiles(smiles)
Draw.MolToFile(mol, file, size=(x, y))
Ejemplo n.º 28
0
def diversity_plots(dset_key, datastore=True, bucket='gsk_ml', title_prefix=None, ecfp_radius=4, out_dir=None, 
                    id_col='compound_id', smiles_col='rdkit_smiles', max_for_mcs=300):
    """
    Plot visualizations of diversity for an arbitrary table of compounds. At minimum, the file should contain
    columns for a compound ID and a SMILES string.
    """
    # Load table of compound names, IDs and SMILES strings
    if datastore:
        cmpd_df = dsf.retrieve_dataset_by_datasetkey(dset_key, bucket)
    else:
        cmpd_df = pd.read_csv(dset_key, index_col=False)
    file_prefix = os.path.splitext(os.path.basename(dset_key))[0]
    if title_prefix is None:
        title_prefix = file_prefix.replace('_', ' ')
    compound_ids = cmpd_df[id_col].values
    smiles_strs = cmpd_df[smiles_col].values
    ncmpds = len(smiles_strs)
    print(ncmpds)
    # Strip salts, canonicalize SMILES strings and create RDKit Mol objects
    print("Canonicalizing molecules...")
    base_mols = [struct_utils.base_mol_from_smiles(smiles) for smiles in smiles_strs]
    for i, mol in enumerate(base_mols):
        if mol is None:
            print('Unable to get base molecule for compound %d = %s' % (i, compound_ids[i]))
    base_smiles = [Chem.MolToSmiles(mol) for mol in base_mols]
    print("Done")

    # Generate ECFP fingerprints
    print("Computing fingerprints...")
    fps = [AllChem.GetMorganFingerprintAsBitVect(mol, ecfp_radius, 1024) for mol in base_mols if mol is not None]
    print("Done")

    if ncmpds <= max_for_mcs:
        # Get MCS distance matrix and draw a heatmap
        print("Computing MCS distance matrix...")
        mcs_dist = dm.mcs(base_mols)
        print("Done")
        cmpd1 = []
        cmpd2 = []
        dist = []
        ind1 = []
        ind2 = []
        for i in range(ncmpds-1):
            for j in range(i+1, ncmpds):
                cmpd1.append(compound_ids[i])
                cmpd2.append(compound_ids[j])
                dist.append(mcs_dist[i,j])
                ind1.append(i)
                ind2.append(j)
        dist_df = pd.DataFrame({'compound_1' : cmpd1, 'compound_2' : cmpd2, 'dist' : dist,
                                'i' : ind1, 'j' : ind2})
        dist_df = dist_df.sort_values(by='dist')
        print(dist_df.head(10))
        if out_dir is not None:
            dist_df.to_csv('%s/%s_mcs_dist_table.csv' % (out_dir, file_prefix), index=False)
            for k in range(10):
                mol_i = base_mols[dist_df.i.values[k]]
                mol_j = base_mols[dist_df.j.values[k]]
                img_file_i = '%s/%d_%s.png' % (out_dir, k, compound_ids[dist_df.i.values[k]])
                img_file_j = '%s/%d_%s.png' % (out_dir, k, compound_ids[dist_df.j.values[k]])
                Draw.MolToFile(mol_i, img_file_i, size=(500,500), fitImage=False)
                Draw.MolToFile(mol_j, img_file_j, size=(500,500), fitImage=False)
    
        mcs_linkage = linkage(mcs_dist, method='complete')
        mcs_df = pd.DataFrame(mcs_dist, columns=compound_ids, index=compound_ids)
        if out_dir is not None:
            pdf_path = '%s/%s_mcs_clustermap.pdf' % (out_dir, file_prefix)
            pdf = PdfPages(pdf_path)
        g = sns.clustermap(mcs_df, row_linkage=mcs_linkage, col_linkage=mcs_linkage, figsize=(12,12), cmap='plasma')
        if out_dir is not None:
            pdf.savefig(g.fig)
            pdf.close()
    
        # Draw a UMAP projection based on MCS distance
        mapper = umap.UMAP(n_neighbors=10, n_components=2, metric='precomputed', random_state=17)
        reps = mapper.fit_transform(mcs_dist)
        rep_df = pd.DataFrame.from_records(reps, columns=['x', 'y'])
        rep_df['compound_id'] = compound_ids
        if out_dir is not None:
            pdf_path = '%s/%s_mcs_umap_proj.pdf' % (out_dir, file_prefix)
            pdf = PdfPages(pdf_path)
        fig, ax = plt.subplots(figsize=(12,12))
        sns.scatterplot(x='x', y='y', data=rep_df, ax=ax)
        ax.set_title("%s, 2D projection based on MCS distance" % title_prefix)
        if out_dir is not None:
            pdf.savefig(fig)
            pdf.close()
            rep_df.to_csv('%s/%s_mcs_umap_proj.csv' % (out_dir, file_prefix), index=False)

    # Get Tanimoto distance matrix
    print("Computing Tanimoto distance matrix...")
    tani_dist = dm.tanimoto(fps)
    print("Done")
    # Draw a UMAP projection based on Tanimoto distance
    mapper = umap.UMAP(n_neighbors=10, n_components=2, metric='precomputed', random_state=17)
    reps = mapper.fit_transform(tani_dist)
    rep_df = pd.DataFrame.from_records(reps, columns=['x', 'y'])
    rep_df['compound_id'] = compound_ids
    if out_dir is not None:
        pdf_path = '%s/%s_tani_umap_proj.pdf' % (out_dir, file_prefix)
        pdf = PdfPages(pdf_path)
    fig, ax = plt.subplots(figsize=(12,12))
    sns.scatterplot(x='x', y='y', data=rep_df, ax=ax)
    ax.set_title("%s, 2D projection based on Tanimoto distance" % title_prefix)
    if out_dir is not None:
        pdf.savefig(fig)
        pdf.close()

    # Draw a cluster heatmap based on Tanimoto distance
    tani_linkage = linkage(tani_dist, method='complete')
    tani_df = pd.DataFrame(tani_dist, columns=compound_ids, index=compound_ids)
    if out_dir is not None:
        pdf_path = '%s/%s_tanimoto_clustermap.pdf' % (out_dir, file_prefix)
        pdf = PdfPages(pdf_path)
    g = sns.clustermap(tani_df, row_linkage=tani_linkage, col_linkage=tani_linkage, figsize=(12,12), cmap='plasma')
    if out_dir is not None:
        pdf.savefig(g.fig)
        pdf.close()
Ejemplo n.º 29
0
def compare_datasets_tsne(args: Args):
    if len(args.smiles_paths) > len(args.colors) or len(
            args.smiles_paths) > len(args.sizes):
        raise ValueError(
            'Must have at least as many colors and sizes as datasets')

    # Random seed for random subsampling
    np.random.seed(0)

    # Load the smiles datasets
    print('Loading data')
    smiles, slices, labels = [], [], []
    for smiles_path in args.smiles_paths:
        # Get label
        label = os.path.basename(smiles_path).replace('.csv', '')

        # Get SMILES
        new_smiles = get_smiles(path=smiles_path,
                                smiles_columns=args.smiles_column,
                                flatten=True)
        print(f'{label}: {len(new_smiles):,}')

        # Subsample if dataset is too large
        if len(new_smiles) > args.max_per_dataset:
            print(f'Subsampling to {args.max_per_dataset:,} molecules')
            new_smiles = np.random.choice(new_smiles,
                                          size=args.max_per_dataset,
                                          replace=False).tolist()

        slices.append(slice(len(smiles), len(smiles) + len(new_smiles)))
        labels.append(label)
        smiles += new_smiles

    # Compute Morgan fingerprints
    print('Computing Morgan fingerprints')
    morgan_generator = get_features_generator('morgan')
    morgans = [
        morgan_generator(smile) for smile in tqdm(smiles, total=len(smiles))
    ]

    print('Running t-SNE')
    start = time.time()
    tsne = TSNE(n_components=2, init='pca', random_state=0, metric='jaccard')
    X = tsne.fit_transform(morgans)
    print(f'time = {time.time() - start:.2f} seconds')

    if args.cluster:
        import hdbscan  # pip install hdbscan
        print('Running HDBSCAN')
        start = time.time()
        clusterer = hdbscan.HDBSCAN(min_cluster_size=5, gen_min_span_tree=True)
        colors = clusterer.fit_predict(X)
        print(f'time = {time.time() - start:.2f} seconds')

    print('Plotting t-SNE')
    x_min, x_max = np.min(X, axis=0), np.max(X, axis=0)
    X = (X - x_min) / (x_max - x_min)

    makedirs(args.save_path, isfile=True)

    plt.clf()
    fontsize = 50 * args.scale
    fig = plt.figure(figsize=(64 * args.scale, 48 * args.scale))
    plt.title('t-SNE using Morgan fingerprint with Jaccard similarity',
              fontsize=2 * fontsize)
    ax = fig.gca()
    handles = []
    legend_kwargs = dict(loc='upper right', fontsize=fontsize)

    if args.cluster:
        plt.scatter(X[:, 0],
                    X[:, 1],
                    s=150 * np.mean(args.sizes),
                    c=colors,
                    cmap='nipy_spectral')
    else:
        for slc, color, label, size in zip(slices, args.colors, labels,
                                           args.sizes):
            if args.plot_molecules:
                # Plots molecules
                handles.append(mpatches.Patch(color=color, label=label))

                for smile, (x, y) in zip(smiles[slc], X[slc]):
                    img = Draw.MolsToGridImage([Chem.MolFromSmiles(smile)],
                                               molsPerRow=1,
                                               subImgSize=(200, 200))
                    imagebox = offsetbox.AnnotationBbox(
                        offsetbox.OffsetImage(img), (x, y),
                        bboxprops=dict(color=color))
                    ax.add_artist(imagebox)
            else:
                # Plots points
                plt.scatter(X[slc, 0],
                            X[slc, 1],
                            s=150 * size,
                            color=color,
                            label=label)

        if args.plot_molecules:
            legend_kwargs['handles'] = handles

    plt.legend(**legend_kwargs)
    plt.xticks([]), plt.yticks([])

    print('Saving t-SNE')
    plt.savefig(args.save_path)
Ejemplo n.º 30
0
 def pixmap_from_smiles(self, s):
     p = Chem.MolFromSmiles(s)
     return Draw.MolToQPixmap(p)
Ejemplo n.º 31
0
def draw_multi(smiles):
    # list of smiles 
    mols=[Chem.MolFromSmiles(s) for s in smiles]
    img = Draw.MolsToGridImage(mols, molsPerRow=7,maxMols=75, subImgSize=(100, 100), legends=[str(i) for i in range(len(mols))])
    return img
Ejemplo n.º 32
0
def smiles_to_image_file(smiles, path):
    mol = Chem.MolFromSmiles(smiles)
    Draw.MolToFile(mol, path)
Ejemplo n.º 33
0
def mols2grid_image(mols, molsPerRow):
    mols = [e if e is not None else Chem.RWMol() for e in mols]
    for mol in mols:
        AllChem.Compute2DCoords(mol)
    return Draw.MolsToGridImage(mols, molsPerRow=molsPerRow, subImgSize=(150, 150))
Ejemplo n.º 34
0
        import rdkit.Chem.Draw as draw
        import rdkit.Chem.AllChem as ac

        # FIXME: ugly hack
        for filename in glob.glob('%s/*.mol2' % args.mol2_dir[0]):
            mol = rd.MolFromMol2File(filename, **_mol_params)

            dirname = os.path.dirname(filename)
            basename = os.path.splitext(os.path.basename(filename))[0]
            outname = os.path.join(dirname, basename + os.extsep + 'svg')

            tmp = ac.Compute2DCoords(mol)

            draw.MolToFile(mol,
                           outname,
                           wedgeBonds=False,
                           size=(150, 150),
                           fitImage=True,
                           kekulize=False)

        with open(args.graph[0], 'rb') as pfile:
            mst = pickle.load(pfile)
            mol_names = pickle.load(pfile)
            dir_names = pickle.load(pfile)

        mst_a = mst.toarray()
        draw_graph(mst, mst_a, mol_names, dir_names, args.method[0])
    else:
        # FIXME: other file types
        mol2_files = glob.glob('%s/*.mol2' % args.mol2_dir[0])

        if not mol2_files:
Ejemplo n.º 35
0
            smile = np.array(smile).astype('str')[0]
            print("Second:", smile)
            mol = MolFromSmiles(smile)
            mols.append(mol)
            second_best_score = 1e10

        if np.any(scores == third_best_score):
            smile = smiles[scores == third_best_score]
            smile = np.array(smile).astype('str')[0]
            print("Third:", smile)
            mol = MolFromSmiles(smile)
            mols.append(mol)
            third_best_score = 1e10

img = Draw.MolsToGridImage(mols,
                           molsPerRow=len(mols),
                           subImgSize=(300, 300),
                           useSVG=True)
with open("molecule_images/best_grammar_molecule.svg", "w") as text_file:
    text_file.write(img)

results_character = np.zeros((n_simulations, 3))
for j in range(1, n_simulations + 1):
    best_value = 1e10
    n_valid = 0
    max_value = 0
    for i in range(iteration):
        smiles = load_object(
            'simulation{}/character/results/valid_smiles{}.dat'.format(j, i))
        scores = load_object(
            'simulation{}/character/results/scores{}.dat'.format(j, i))
        n_valid += len([x for x in smiles if x is not None])
Ejemplo n.º 36
0
def calc_MST(filenames, method, do_draw=True, parallel=False):

    from functools import partial

    import numpy as np
    from scipy.sparse import csr_matrix
    from scipy.sparse.csgraph import minimum_spanning_tree

    import rdkit.Chem.AllChem as ac

    score = valid_methods[method]

    N = len(filenames)
    M = N * (N - 1) / 2
    npout = (M + (100 - M % 100)) / 100
    simmat = np.zeros(shape=(N, N), dtype=np.float32)

    mols = []
    mol_names = []
    dir_names = []

    print('Reading input files...')

    for filename in filenames:
        mol = rd.MolFromMol2File(filename, **_mol_params)

        dirname = os.path.dirname(filename)
        basename = os.path.splitext(os.path.basename(filename))[0]
        outname = os.path.join(dirname, basename + os.extsep + 'svg')

        mols.append(mol)
        mol_names.append(basename)
        dir_names.append(dirname)

        tmp = ac.Compute2DCoords(mol)

        if do_draw:
            import rdkit.Chem.Draw as draw
            draw.MolToFile(mol,
                           outname,
                           wedgeBonds=False,
                           size=(150, 150),
                           fitImage=True,
                           kekulize=False)

    print('Computing similarity matrix using %s...' % method)

    if parallel:
        pool = mp.Pool(mp.cpu_count())
        map_func = pool.imap
    else:
        map_func = map

    results = []

    for i in range(N - 1):
        print('%s...' % mol_names[i])

        partial_func = partial(score, mols[i])
        results.append(map_func(partial_func, mols[i + 1:N]))

    for i, row in enumerate(results):
        simmat[i][i + 1:N] = [s for s in row]

    if parallel:
        pool.close()
        pool.join()

    print('similarity score matrix:\n', simmat)

    # NOTE: this removes edges with the larger weight
    mst = minimum_spanning_tree(csr_matrix(simmat))

    cnt = 0
    mst_a = mst.toarray()

    print('\nminimal spanning tree (MST):\n', mst_a)

    print('\nsuggested mappings from MST:')

    for i, j in zip(mst.nonzero()[0], mst.nonzero()[1]):
        cnt += 1
        n1 = mol_names[i]
        n2 = mol_names[j]
        score = mst_a[i][j]

        print('%6i) %s <> %s (%f)\n' % (cnt, n1, n2, score), end='')

    with open(MST_PICKLE_FILE, 'wb') as pfile:
        pickle.dump(mst, pfile, pickle.HIGHEST_PROTOCOL)
        pickle.dump(mol_names, pfile, pickle.HIGHEST_PROTOCOL)
        pickle.dump(dir_names, pfile, pickle.HIGHEST_PROTOCOL)

    return mst, mst_a, mol_names, dir_names
Ejemplo n.º 37
0
def rdkit2d(smiles: str, tag: str):
    mol = Chem.MolFromSmiles(smiles)
    Draw.MolToFile(mol, GRAPH_DIR + tag + '.png')
Ejemplo n.º 38
0
# -*- coding: utf-8 -*-
"""
@author: hkaneko
"""

from rdkit import Chem
from rdkit.Chem import Draw

molecule = Chem.MolFromSmiles(
    'CC(N)C(=O)O')  # SMILES の読み込み。'CC(N)C(=O)O' はアラニン
Draw.MolToFile(molecule, 'molecule.png')

# 右の IPython コンソールに
# Draw.MolToImage(molecule)
# と入力して実行しても、分子を描画できます
Ejemplo n.º 39
0
 def testDrawReaction(self):
     # this shouldn't throw an exception...
     rxn = AllChem.ReactionFromSmarts(
         "[c;H1:3]1:[c:4]:[c:5]:[c;H1:6]:[c:7]2:[nH:8]:[c:9]:[c;H1:1]:[c:2]:1:2.O=[C:10]1[#6;H2:11][#6;H2:12][N:13][#6;H2:14][#6;H2:15]1>>[#6;H2:12]3[#6;H1:11]=[C:10]([c:1]1:[c:9]:[n:8]:[c:7]2:[c:6]:[c:5]:[c:4]:[c:3]:[c:2]:1:2)[#6;H2:15][#6;H2:14][N:13]3"
     )
     _ = Draw.ReactionToImage(rxn)
Ejemplo n.º 40
0
def draw_mol(
    smiles,
    height=200,
    width=200,
    img_type=None,
    highlightAtoms=[],
    atomcolors=[],
    highlightBonds=[],
    bondcolors={},
    mol=None,
):
    """
    Draw a molecule from a smiles
    :param smiles: the SMILES to render
    :param height: the height in px
    :param width: the width in px
    :return: an SVG as a string of the inage
    """
    if mol is None:
        mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return "None Mol"
    AllChem.Compute2DCoords(mol)
    Chem.Kekulize(mol)
    if not height:
        height = 200
    if not width:
        width = 200
    if img_type == "png":
        img = Draw.MolToImage(
            mol,
            options=options,
            highlightBonds=highlightBonds,
            highlightBondColors=bondcolors,
        )
        img = img.convert("RGBA")
        datas = img.getdata()
        newData = []
        for item in datas:
            if item[0] == 255 and item[1] == 255 and item[2] == 255:
                newData.append((255, 255, 255, 0))
            else:
                newData.append(item)
        img.putdata(newData)
        response = HttpResponse(content_type="image/png")
        img.save(response, "PNG")
        return response
    else:
        drawer = rdMolDraw2D.MolDraw2DSVG(height, width)
        drawopt = drawer.drawOptions()
        drawopt.clearBackground = False
        drawer.DrawMolecule(
            mol,
            highlightAtoms=highlightAtoms,
            highlightAtomColors=atomcolors,
            highlightBonds=highlightBonds,
            highlightBondColors=bondcolors,
        )
        drawer.DrawMolecule(mol)
        drawer.FinishDrawing()
        return drawer.GetDrawingText().replace("svg:", "")