Beispiel #1
0
    def depict(self, filename=None, ipython=False):
        from rdkit.Chem.Draw import IPythonConsole
        from rdkit.Chem.Draw import MolToImage
        from rdkit.Chem.Draw import rdMolDraw2D
        from rdkit.Chem.AllChem import EmbedMolecule
        from IPython.display import SVG
        from rdkit.Chem import RWMol, MolFromSmiles, Atom, BondType, ChiralType

        _ = MolFromSmiles('C')
        rmol = RWMol(_)

        dict_old_new_idx = {}
        n = 1
        for a in self.atoms:
            old_idx = a.GetIdx()
            rmol.AddAtom(a)
            dict_old_new_idx[old_idx] = n
            n += 1

        for a in self.enviroments:
            old_idx = a.GetIdx()
            a.SetChiralTag(ChiralType.CHI_UNSPECIFIED)
            a.SetIsAromatic(0)
            rmol.AddAtom(a)
            dict_old_new_idx[old_idx] = n
            n += 1

        for b in self.Bonds:
            rmol.AddBond(dict_old_new_idx[b.GetBeginAtomIdx()],
                         dict_old_new_idx[b.GetEndAtomIdx()], b.GetBondType())
        for b in self.bondsenvironments:
            rmol.AddBond(dict_old_new_idx[b.GetBeginAtomIdx()],
                         dict_old_new_idx[b.GetEndAtomIdx()], b.GetBondType())

        rmol.RemoveAtom(0)

        EmbedMolecule(rmol)
        drawer = rdMolDraw2D.MolDraw2DSVG(400, 200)

        drawer.DrawMolecule(rmol)

        drawer.FinishDrawing()
        svg = drawer.GetDrawingText()

        if filename != None:
            f = open(filename, 'w')
            f.write(svg)
            f.close()

        if ipython:
            svg = svg.replace('svg:', '')
            return SVG(svg)
        else:
            return None
    def get_all_features(self, df, mol_list):

        # NOW GET ALL THE FEATURES
        df = df.astype('object')
        df['x'] = ''
        df['edge_index'] = ''
        df['edge_attr'] = ''
        df['u'] = ''
        print("Length of df =", len(df), "Length of mol_list =", len(mol_list))
        for i in range(len(mol_list)):
            mol = mol_list[i]
            if (0 == XYZ):
                mol = Chem.AddHs(mol) 
                EmbedMolecule(mol)
            if (0 == addH):
                mol = Chem.RemoveHs(mol)
            else:
                mol = Chem.AddHs(mol)
            print("Getting features for mol =", i, "refcode =", df.at[i,"refcode_csd"])
            df.at[i,"x"], x_sum, x_nbrAtoms = self.get_node_features(mol)
            if (1 == XYZ):
                df.at[i,"edge_index"],  df.at[i,"edge_attr"], edge_attr_sum, edge_attr_nbrBonds = self.get_edge_features(mol, i)
            else:
                df.at[i,"edge_index"],  df.at[i,"edge_attr"], df.at[i,"xyz"], edge_attr_sum, edge_attr_nbrBonds = self.get_edge_features(mol, i)
            df.at[i,"u"] = self.get_global_features(mol)
            x_sum_all = x_sum
            x_nbrAtoms_all = x_nbrAtoms
            edge_attr_sum_all = edge_attr_sum
            edge_attr_nbrBonds_all = edge_attr_nbrBonds
            if 0 != len(df.at[i,"edge_attr"]) :
                if (0 == i):
                    x_sum_all = x_sum
                    x_nbrAtoms_all = x_nbrAtoms
                    edge_attr_sum_all = edge_attr_sum
                    edge_attr_nbrBonds_all = edge_attr_nbrBonds
                else:
                    x_sum_all = [x_sum_all[i]+x_sum[i] for i in range(len(x_sum))]
                    x_nbrAtoms_all = x_nbrAtoms_all + x_nbrAtoms
                    edge_attr_sum_all = [edge_attr_sum_all[i]+edge_attr_sum[i] for i in range(len(edge_attr_sum))]
                    edge_attr_nbrBonds_all = edge_attr_nbrBonds_all + edge_attr_nbrBonds
        self.x_mean = [x_sum_all[i]/x_nbrAtoms_all for i in range(len(x_sum_all))] 
        self.edge_attr_mean = [edge_attr_sum_all[i]/edge_attr_nbrBonds_all for i in range(len(edge_attr_sum_all))]
        #print("Mean values of atomic features", self.x_mean)
        #print("Mean values of bond features", self.edge_attr_mean)
        # Now drop the rows that are marked with 'drop'
        old_len = len(df)
        df = df[df.edge_index != 'drop']
        df = df.reset_index(drop=True)
        new_len = len(df)
        if (old_len != new_len):
            dropped = old_len - new_len
            print(dropped, "rows were deleted because the coordinates in mol and XYZ did not have the same order")
        self.df = df
        return(df)
Beispiel #3
0
 def dock_mol(self, mol):
     dock_mol = mol.cap_with_h()
     EmbedMolecule(dock_mol)
     mol.docking_mol = dock_mol
     MolToPDBFile(dock_mol, Path('temp.pdb').as_posix())
     pdbqt_file = export_pdbqt('temp')
     f_out_pdbqt = Path(pdbqt_file.parent, 'temp-out.pdbqt')
     f_out_log = Path(pdbqt_file.parent, 'temp-out.txt')
     vina_command = f"vina --config {self.dir}/{self.name}-config.txt --ligand {pdbqt_file} --out {f_out_pdbqt} " \
                    f"--log {f_out_log}"
     print(vina_command)
     c = os_command(vina_command)
     mol.get_energy()
     mol.export_data(self.name)
Beispiel #4
0
def createXYZ_from_SMILES(df, mol_list):
    new_mol_list = []
    print("Creating XYZ coordinates from SMILES")
    df = df.astype('object')
    df['xyz'] = ''
    for i, row in df.iterrows():
        #print(i, "Creating XYZ coordinates for mol with SMILES code = ", row['smiles'])
        mol = Chem.MolFromSmiles(row['smiles'])
        mol = Chem.AddHs(mol)
        EmbedMolecule(mol)
        xyz = Chem.rdmolfiles.MolToXYZBlock(mol)
        if (xyz is ''):
            print(i, "Unable to create XYZ coordinates for", row['smiles'],
                  "Droping it from the dataframe")
            df.at[i, "xyz"] = 'drop'
        else:
            new_mol_list.append(mol_list[i])
            df.at[i, "xyz"] = xyz
    df = df[df["xyz"] != 'drop']
    df = df.reset_index(drop=True)
    return (df, new_mol_list)
Beispiel #5
0
def processline(t, step, line):
    global lensum
    if t.incr():
        return 1
    if step == 0:
        lensum += len(line)
    else:
        m = MolFromSmiles(line)
        if step == 100:
            lensum += len(line)
        elif step == 105:
            lensum += len(sha256(line).hexdigest())
        elif step in (110, 120):
            with open(tmpname, 'wb+') as f:
                print(line, file=f)
                if step == 120:
                    os.fsync(f.fileno())
            lensum += os.stat(tmpname).st_size
        elif step == 210:
            lensum += m.GetNumAtoms()
        elif step == 220:
            lensum += m.GetNumBonds()
        elif step == 300:
            lensum += len(MolToSmiles(m))
        elif step == 400:
            lensum += len(MolToMolBlock(m))
        elif step == 420:
            m2 = AddHs(m)
            EmbedMolecule(m2, randomSeed=2020)
            m2 = RemoveHs(m2)
            m2.SetProp("_Name", "test")
            lensum += len(MolToMolBlock(m2))
        elif step == 600:
            lensum += mol2file(m, 'svg')
        elif step == 610:
            lensum += mol2file(m, 'png')
        else:
            raise ValueError("Not implemented step " + str(step))

    return 0
Beispiel #6
0
    def depictFGs(self, fgs, filename=None, ipython=False, optimize=False):
        from rdkit.Chem.Draw import IPythonConsole
        from rdkit.Chem.Draw import MolToImage
        from rdkit.Chem.Draw import rdMolDraw2D
        from IPython.display import SVG
        from rdkit.Chem.AllChem import EmbedMolecule

        drawer = rdMolDraw2D.MolDraw2DSVG(400, 200)

        highlightAtoms = [a for fg in fgs for a in fg.AtomsIdx
                          ] + [a for fg in fgs for a in fg.EnviromentsIdx]
        highlightColors = {
            a: self._colors[i % len(self._colors)]
            for i in range(len(fgs)) for a in fgs[i].AtomsIdx
        }

        if optimize:
            EmbedMolecule(self._mol._mol)

        drawer.DrawMolecule(self._mol._mol,
                            highlightAtoms=highlightAtoms,
                            highlightBonds=[],
                            highlightAtomColors=highlightColors)

        drawer.FinishDrawing()
        svg = drawer.GetDrawingText()

        if filename != None:
            f = open(filename, 'w')
            f.write(svg)
            f.close()

        if ipython:
            svg = svg.replace('svg:', '')
            return SVG(svg)
        else:
            return None
Beispiel #7
0
    def depict(self, sketch=True, filename=None, ipython=False, optimize=False, optimizemode='std', removeHs=True,
               atomlabels=None, highlightAtoms=None, resolution=(400, 200)):
        """
        Depicts the molecules. It is possible to save it into an svg file and also generates a jupiter-notebook rendering

        Parameters
        ----------
        sketch: bool
            Set to True for 2D depiction
        filename: str
            Set the filename for the svg file
        ipython: bool
            Set to True to return the jupiter-notebook rendering
        optimize: bool
            Set to True to optimize the conformation. Works only with 3D.
        optimizemode: ['std', 'mmff']
            Set the optimization mode for 3D conformation
        removeHs: bool
            Set to True to hide hydrogens in the depiction
        atomlabels: str
            Accept any combinations of the following pararemters as unique string '%a%i%c%*' a:atom name, i:atom index,
            c:atom formal charge (+/-), *:chiral (* if atom is chiral)
        highlightAtoms: list
            List of atom to highlight. It can be also a list of atom list, in this case different colors will be used
        resolution: tuple of integers
            Resolution in pixels: (X, Y)

        Returns
        -------
            ipython_svg: SVG object if ipython is set to True

        Example
        -------
        >>> sm.depict(ipython=True, optimize=True, optimizemode='std')  # doctest: +SKIP
        >>> sm.depict(ipython=True, sketch=True)  # doctest: +SKIP
        >>> sm.depict(ipython=True, sketch=True)  # doctest: +SKIP
        >>> sm.depict(ipython=True, sketch=True, atomlabels="%a%i%c")  # doctest: +SKIP
        >>> ids = np.intersect1d(sm.get('idx', 'hybridization SP2'), sm.get('idx', 'element C'))  # doctest: +SKIP
        >>> sm.depict(ipython=True, sketch=True,highlightAtoms=ids.tolist(), removeHs=False)  # doctest: +SKIP
        """
        from rdkit import Chem
        from rdkit.Chem.AllChem import Compute2DCoords, EmbedMolecule, MMFFOptimizeMolecule, ETKDG
        from copy import deepcopy

        if sketch and optimize:
            raise ValueError('Impossible to use optimization in  2D sketch representation')

        if optimizemode not in ['std', 'mmff']:
            raise ValueError('Optimization mode {} not understood. Can be "std" or "ff"'.format(optimizemode))

        elements = self._element
        indexes = self._idx
        formalcharges = self._formalcharge
        chirals = self._chiral

        _mol = deepcopy(self._mol)

        if sketch:
            Compute2DCoords(_mol)

        if removeHs:
            _mol = Chem.RemoveHs(_mol)
            elements = self.get('element', 'element H', invert=True)
            indexes = self.get('idx', 'element H', invert=True)
            formalcharges = self.get('formalcharge', 'element H', invert=True)
            chirals = self.get('chiral', 'element H', invert=True)

        _labelsFunc = ['a', 'i', 'c', '*']

        if atomlabels is not None:
            labels = atomlabels.split('%')[1:]
            formalcharges = ['' if c == 0 else "+" if c == 1 else "-" for c in formalcharges]
            chirals = ['' if c == '' else '*' for c in chirals]
            values = [elements, indexes, formalcharges, chirals]

            idxs = [_labelsFunc.index(l) for l in labels]
            labels_required = [values[i] for i in idxs]
            atomlabels = ["".join([str(i) for i in a]) for a in list(zip(*labels_required))]

        if optimize:
            if optimizemode == 'std':
                EmbedMolecule(_mol, ETKDG())
            elif optimizemode == 'mmff':
                MMFFOptimizeMolecule(_mol)

        return _depictMol(_mol, filename=filename, ipython=ipython,  atomlabels=atomlabels,
                          highlightAtoms=highlightAtoms, resolution=resolution)
Beispiel #8
0
    canvas.addCanvasText('%s\r\nMolWt: %g\tTPSA: %g' % (s, MolWt(m), TPSA(m)),
                         pos, font)

    with open('xx' + s + '.png', 'w') as f:
        canvas.flush()
        img.save(f)


if __name__ == '__main__':
    drawmol('CN1CCC[C@H]1c2cccnc2')
    drawmol('CC(=O)OC1=CC=CC=C1C(=O)O')
    drawmol('O1C=C[C@H]([C@H]1O2)c3c2cc(OC)c4c3OC(=O)C5=C4CCC(=O)5')
    sys.exit(0)

    # sample code to use new drawing API (older rdkit do not have DrawString)
    from rdkit.Chem.AllChem import EmbedMolecule
    assert EmbedMolecule(m) >= 0
    x = Draw.rdMolDraw2D.MolDraw2DSVG(200, 250)
    x.DrawMolecule(m)
    x.DrawString('Test String', 20, 200)
    x.FinishDrawing()
    print(x.GetDrawingText())

    # sample code to generate a legend
    legstr = ''
    if molname:
        legstr += molname + '\n'
    legstr += '%s\nWt=%g LogP=%g TPSA=%g\nHBA=%d HBD=%d RotBond=%d\n' % \
        (smiles, MolWt(mol), MolLogP(mol), TPSA(mol),
         NumHAcceptors(mol), NumHDonors(mol), NumRotatableBonds(mol))
Beispiel #9
0
    def depict(
        self,
        ids=None,
        sketch=True,
        filename=None,
        ipython=False,
        optimize=False,
        optimizemode="std",
        removeHs=True,
        legends=None,
        highlightAtoms=None,
        mols_perrow=3,
    ):
        """
        Depicts the molecules into a grid. It is possible to save it into an svg file and also generates a
        jupiter-notebook rendering

        Parameters
        ----------
        ids: list
            The index of the molecules to depict
        sketch: bool
            Set to True for 2D depiction
        filename: str
            Set the filename for the svg file
        ipython: bool
            Set to True to return the jupiter-notebook rendering
        optimize: bool
            Set to True to optimize the conformation. Works only with 3D.
        optimizemode: ['std', 'mmff']
            Set the optimization mode for 3D conformation
        removeHs: bool
            Set to True to hide hydrogens in the depiction
        legends: str
            The name to used for each molecule. Can be 'names':the name of themselves; or 'items': a incremental id
        highlightAtoms: list
            A List of atom to highligh for each molecule. It can be also a list of atom list, in this case different
            colors will be used
        mols_perrow: int
            The number of molecules to depict per row of the grid

        Returns
        -------
            ipython_svg: SVG object if ipython is set to True

        """
        from rdkit.Chem.AllChem import (
            Compute2DCoords,
            EmbedMolecule,
            MMFFOptimizeMolecule,
            ETKDG,
        )
        from rdkit.Chem import RemoveHs
        from moleculekit.smallmol.util import depictMultipleMols

        if sketch and optimize:
            raise ValueError(
                "Impossible to use optmization in  2D sketch representation")

        if legends is not None and legends not in ["names", "items"]:
            raise ValueError('The "legends" should be "names" or "items"')

        _smallmols = self.getMols(ids)

        if ids is None:
            _mols = [m._mol for m in self._mols]
        else:
            _mols = [m._mol for m in self.getMols(ids)]

        if highlightAtoms is not None:
            if len(highlightAtoms) != len(_mols):
                raise ValueError(
                    "The highlightAtoms {} should have the same length of the "
                    "mols {}".format(len(highlightAtoms), len(_mols)))

        if sketch:
            for _m in _mols:
                Compute2DCoords(_m)

        if removeHs:
            _mols = [RemoveHs(_m) for _m in _mols]

        # activate 3D coords optimization
        if optimize:
            if optimizemode == "std":
                for _m in _mols:
                    EmbedMolecule(_m)
            elif optimizemode == "mmff":
                for _m in _mols:
                    MMFFOptimizeMolecule(_m, ETKDG())

        legends_list = []
        if legends == "names":
            legends_list = [_m.getProp("ligname") for _m in _smallmols]
        elif legends == "items":
            legends_list = [str(n + 1) for n in range(len(_smallmols))]

        return depictMultipleMols(
            _mols,
            ipython=ipython,
            legends=legends_list,
            highlightAtoms=highlightAtoms,
            filename=filename,
            mols_perrow=mols_perrow,
        )
Beispiel #10
0
#!/usr/bin/python2
# Little harness for timing how long it takes to embed a molecule
# which seems extremely variable on one machine,
from __future__ import print_function, division
import sys, time, os
from rdkit.Chem import MolFromSmiles, AddHs, RemoveHs
from rdkit.Chem.AllChem import EmbedMolecule

if __name__ == "__main__":
    dotimestamp = int(os.getenv('MOLEMBED_TIME', '0'))
    doaddh = int(os.getenv('MOLEMBED_ADDH', '0'))
    rseed = int(os.getenv('MOLEMBED_SEED', '0'))
    t0 = time.time()
    for line in sys.stdin.readlines():
        s = line.strip()
        if dotimestamp:
            t1 = time.time()
            dt = (t1 - t0) * 1e3
            print('%.3f' % dt, s)
            t0 = t1
        else:
            print(s)
        m = MolFromSmiles(s)
        if doaddh:
            m2 = AddHs(m)
        else:
            m2 = m
        EmbedMolecule(m2, randomSeed=rseed)