Ejemplo n.º 1
0
    def test_convertToDataFrame(self):
        sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf")

        lib = SmallMolLib(sdffile)

        df = lib.toDataFrame()

        self.assertIsInstance(
            df,
            core.frame.DataFrame,
            msg="The SmallMolLib object was not correctly converted into pandas"
            "DataFrame",
        )

        cols = df.columns.tolist()
        ref_cols = SDF_FIELDS

        self.assertEqual(
            cols,
            ref_cols,
            msg="The fields in the SmallMolLib object was not the expected one",
        )

        ligname_99 = df.iloc[99][0]
        ref_ligname = SDF_LOC_0_99

        self.assertEqual(ligname_99,
                         ref_ligname,
                         msg="The ligand name found is not the expected one")
Ejemplo n.º 2
0
    def test_readSmiles(self):
        smifile = os.path.join(self.dataDir, "fda_drugs_light.smi")
        lib = SmallMolLib(smifile)
        assert len(lib) == 100

        smifile = os.path.join(self.dataDir, "fda_drugs_light.smi.gz")
        lib = SmallMolLib(smifile)
        assert len(lib) == 100
Ejemplo n.º 3
0
    def test_appendSmallMolLib(self):
        sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf')
        lib = SmallMolLib(sdffile)
        lib2 = SmallMolLib(sdffile)

        lib.appendSmallLib(lib2)

        n_mol2_merged = lib.numMols

        self.assertEqual(
            n_mol2_merged,
            SDF_N_MOLS * 2,
            msg="The number of molecules in the SmallMolLib is not as expected."
            "The two sdf were not correctly merged. ")
Ejemplo n.º 4
0
    def test_loaSdffile(self):
        sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf')
        lib = SmallMolLib(sdffile)
        n_mols = lib.numMols
        self.assertEqual(
            n_mols, SDF_N_MOLS, 'Molecules not correctly loaded. '
            'Expected: {}; Now: {}'.format(SDF_N_MOLS, n_mols))

        sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf.gz')
        lib = SmallMolLib(sdffile)
        n_mols = lib.numMols
        self.assertEqual(
            n_mols, SDF_N_MOLS, 'Molecules not correctly loaded. '
            'Expected: {}; Now: {}'.format(SDF_N_MOLS, n_mols))
Ejemplo n.º 5
0
    def test_writeSmiles(self):
        sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf")

        lib = SmallMolLib(sdffile)
        tmpfile = NamedTemporaryFile().name + ".smi"
        lib.writeSmiles(tmpfile)

        with open(tmpfile, "r") as f:
            filelines = f.readlines()[1:]

        with open(os.path.join(self.dataDir, "fda_drugs_light.smi"), "r") as f:
            reflines = f.readlines()[1:]

        self.assertEqual(filelines, reflines)
Ejemplo n.º 6
0
    def test_appendSmallMol(self):
        mol2file = os.path.join(self.dataDir, 'benzamidine.mol2')
        sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf')

        lib = SmallMolLib(sdffile)
        sm = SmallMol(mol2file)
        lib.appendSmallMol(sm)

        n_mol2_append = lib.numMols

        self.assertEqual(
            n_mol2_append,
            SDF_N_MOLS + 1,
            msg="The number of molecules in the SmallMolLib is not as expected."
            "The mol2 were not correctly append. ")
Ejemplo n.º 7
0
    def test_getCommonStructure(self):
        sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf")
        lib = SmallMolLib(sdffile)
        rd_mols = lib._mols

        cms, cms_idxs, _ = getMaximumCommonSubstructure(rd_mols,
                                                        returnAtomIdxs=True)

        self.assertIsInstance(cms,
                              rdkit.Chem.rdchem.Mol,
                              msg="The object is not a rdkit Molecule object")

        ref_cms_natoms = CMS_N_ATOMS
        ref_atomidx_mol0 = CMS_AIDX_MOL0
        cms_atomidx_mol0 = cms_idxs[0]
        cms_natoms = len(cms_atomidx_mol0)

        self.assertEqual(
            cms_atomidx_mol0,
            ref_atomidx_mol0,
            msg="The atomidx of the CMS for the mol0 are not the expected ones",
        )
        self.assertEqual(
            cms_natoms,
            ref_cms_natoms,
            msg=
            "The number of atom in the CMS for the mol0 are not the expected ones",
        )
Ejemplo n.º 8
0
    def test_writeSdf(self):
        sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf')
        lib = SmallMolLib(sdffile)

        sdfname = NamedTemporaryFile().name + '.sdf'
        lib.writeSdf(sdfname)

        sdf_exists = os.path.isfile(sdfname)

        self.assertTrue(sdf_exists, msg="The sdf written was not found")

        sdf = SmallMolLib(sdfname)

        self.assertIsInstance(
            sdf,
            SmallMolLib,
            msg="The sdf written was not correctly loaded. Probably the previous"
            "writing went wrong")
Ejemplo n.º 9
0
    def test_loaSdffile(self):
        sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf")
        lib = SmallMolLib(sdffile)
        n_mols = lib.numMols
        self.assertEqual(
            n_mols,
            SDF_N_MOLS,
            f"Molecules not correctly loaded. Expected: {SDF_N_MOLS}; Now: {n_mols}",
        )

        sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf.gz")
        lib = SmallMolLib(sdffile)
        n_mols = lib.numMols
        self.assertEqual(
            n_mols,
            SDF_N_MOLS,
            f"Molecules not correctly loaded. Expected: {SDF_N_MOLS}; Now: {n_mols}",
        )
Ejemplo n.º 10
0
    def test_cluster_shape(self):
        sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf')
        lib = SmallMolLib(sdffile)
        mols = lib._mols

        cl, det = cluster(mols, 'shape', returnDetails=True)

        n_clusters = det['numClusters']

        self.assertIsInstance(n_clusters, np.int64, msg="None valid number of clusters")
Ejemplo n.º 11
0
def areLigandsOptimized(sdf_file):
    from moleculekit.smallmol.smallmollib import SmallMolLib

    not_optimized = []
    for lig in SmallMolLib(sdf_file):
        ligname = lig.ligname
        lig = lig.toMolecule()

        if not isLigandOptimized(lig):
            not_optimized.append(ligname)

    return len(not_optimized) == 0, not_optimized
Ejemplo n.º 12
0
    def test_restrained_embedding(self):
        from moleculekit.smallmol.tools.restrainedembed import restrainedEmbed
        import numpy as np

        atomPos = np.load(os.path.join(self.dataDir, 'restrain_positions.npy'), allow_pickle=True).item()
        refcoords = np.load(os.path.join(self.dataDir, 'restrain_final_coords.npy'), allow_pickle=True)
        sml = SmallMolLib(os.path.join(self.dataDir, 'restrain_molecule.sdf'))
        sm = sml[0]
        
        restrainedEmbed(sm._mol, atomPos)

        assert np.allclose(refcoords, sm._coords)
Ejemplo n.º 13
0
    def test_depict(self):
        import IPython
        refimg = os.path.join(self.dataDir, 'sdf.svg')
        sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf')

        lib = SmallMolLib(sdffile)

        img_name = NamedTemporaryFile().name + '.svg'
        lib.depict(sketch=True, filename=img_name)
        _img = lib.depict(sketch=True, ipython=True)

        refimg_size = os.path.getsize(refimg)
        sm_img_size = os.path.getsize(img_name)

        self.assertIsInstance(
            _img,
            IPython.core.display.SVG,
            msg="The object is not an IPython image as expected")
        self.assertEqual(
            sm_img_size,
            refimg_size,
            msg="The svg image does not have the same size of the reference")
Ejemplo n.º 14
0
def areLigandsOptimized(sdf_file, max_check=None):
    from moleculekit.smallmol.smallmollib import SmallMolLib

    not_optimized = []
    for i, lig in enumerate(SmallMolLib(sdf_file)):
        if max_check is not None and i >= max_check:
            break

        ligname = lig.ligname
        lig = lig.toMolecule()

        if not isLigandOptimized(lig):
            not_optimized.append(ligname)

    return len(not_optimized) == 0, not_optimized
Ejemplo n.º 15
0
    def test_cluster_pathFingerprints(self):
        sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf')
        lib = SmallMolLib(sdffile)
        mols = lib._mols

        cl, det = cluster(mols, 'pathFingerprints', returnDetails=True)

        ref_ncluster = PATHFINGERPRINTS_N_CLUSTER
        ref_populations = PATHFINGERPRINTS_POPULATION_CLUSTER

        ncluster = det['numClusters']
        population = det['populations'].tolist()

        self.assertEqual(ncluster, ref_ncluster, msg="The number of cluster identified are not as expected")
        self.assertEqual(population, ref_populations, msg="The population fo the cluster are not the expected one")
Ejemplo n.º 16
0
def areLigandsDocked(prot_file, sdf_file, threshold=10, max_check=None):
    from moleculekit.smallmol.smallmollib import SmallMolLib
    from moleculekit.molecule import Molecule

    not_docked = []
    prot = Molecule(prot_file)
    for i, lig in enumerate(SmallMolLib(sdf_file)):
        if max_check is not None and i >= max_check:
            break

        ligname = lig.ligname
        lig = lig.toMolecule()

        if not isLigandDocked(prot, lig, threshold):
            not_docked.append(ligname)

    return len(not_docked) == 0, not_docked
Ejemplo n.º 17
0
    def test_restrained_embedding(self):
        from moleculekit.smallmol.tools.restrainedembed import restrainedEmbed
        import numpy as np

        atomPos = np.load(os.path.join(self.dataDir, "restrain_positions.npy"),
                          allow_pickle=True).item()
        refcoords = np.load(os.path.join(self.dataDir,
                                         "restrain_final_coords.npy"),
                            allow_pickle=True)
        sml = SmallMolLib(os.path.join(self.dataDir, "restrain_molecule.sdf"))
        sm = sml[0]

        restrainedEmbed(sm._mol, atomPos)

        restr_atoms = np.sort(list(atomPos.keys()))
        coor_diff = refcoords[restr_atoms].squeeze(
        ) - sm._coords[restr_atoms].squeeze()
        assert np.abs(coor_diff).max() < 0.1
Ejemplo n.º 18
0
    def test_cluster_mcs(self):
        sdffile = os.path.join(self.dataDir, "fda_drugs_light.sdf")
        lib = SmallMolLib(sdffile)
        mols = lib._mols

        _, det = cluster(mols, "mcs", returnDetails=True)

        ref_ncluster = MCS_N_CLUSTER
        ref_populations = MCS_POPULATION_CLUSTER

        ncluster = det["numClusters"]
        population = det["populations"].tolist()

        self.assertEqual(
            ncluster,
            ref_ncluster,
            msg="The number of cluster identified are not as expected",
        )
        self.assertEqual(
            population,
            ref_populations,
            msg="The population fo the cluster are not the expected one",
        )
Ejemplo n.º 19
0
    def test_removeMols(self):
        sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf')

        lib = SmallMolLib(sdffile)

        mols_ids = SDF_IDS_DELETE
        ref_mols_name = SDF_MOLNAME_DELETE

        mols_name = [s.ligname for s in lib.getMols(mols_ids)]

        self.assertListEqual(
            mols_name,
            ref_mols_name,
            msg="The molecules at the given indexes do not match with the"
            "expected")
        lib.removeMols(mols_ids)

        mols_name_now = [s.ligname for s in lib.getMols(mols_ids)]

        self.assertFalse(mols_name_now == mols_name,
                         msg="The molecules seem to not be deleted correctly")
Ejemplo n.º 20
0
def getChemblSimilarLigandsBySmile(smi, threshold=85, returnSmiles=False):
    """
    Returns a SmallMolLib object of the ligands having a similarity with a smile of at least the specified
    threshold.. This molecules are retrieve from Chembl. It is possible to return also the list smiles.

    Parameters
    ----------
    smi: str
        The smile
    threshold: int
        The threshold value to apply for the similarity search
    returnSmiles: bool
        If True, the list smiles is returned

    Returns
    -------
    sm: moleculekit.smallmol.smallmol.SmallMol
        The SmallMol object

    smiles: str
        The list of smiles

    Example
    -------
    >>> _, smile = getChemblLigandByDrugName('ibuprofen', returnSmile=True)  # doctest: +SKIP
    >>> lib = getChemblSimilarLigandsBySmile(smile)  # doctest: +SKIP
    >>> lib.numMols  # doctest: +SKIP
    4
    >>> lib, smiles = getChemblSimilarLigandsBySmile(smile, returnSmiles=True)  # doctest: +SKIP
    >>> len(smiles)  # doctest: +SKIP
    4
    """
    from moleculekit.smallmol.smallmol import SmallMol
    from moleculekit.smallmol.smallmollib import SmallMolLib

    try:
        from chembl_webresource_client.new_client import new_client
    except ImportError:
        raise ImportError(
            "You need to install the chembl_webresource package to use this function. Try using `conda install "
            "-c chembl chembl_webresource_client`.")

    smi_list = []

    similarity = new_client.similarity
    results = similarity.filter(smiles=smi, similarity=threshold).only(
        ["molecule_structures"])
    results = results.all()
    for r in range(len(results)):
        tmp_smi = results[r]["molecule_structures"]["canonical_smiles"]
        fragments = tmp_smi.split(".")
        fragments_len = [len(fr) for fr in fragments]
        fragment = fragments[fragments_len.index(max(fragments_len))]

        if fragment not in smi_list:
            smi_list.append(fragment)

    lib = SmallMolLib()
    for smi in smi_list:
        lib.appendSmallMol(SmallMol(smi))

    if returnSmiles:
        return lib, smi_list

    return lib
Ejemplo n.º 21
0
                rep = '{}: {}'.format(name, len(self.reps.replist))
            else:
                rep = '{}: {}'.format(name, field)
            return rep

        rep = 'SmallMol with {} atoms and {} conformers'.format(self.numAtoms, self.numFrames)
        for p in sorted(self._atom_fields):
            if p.startswith('_'):
                continue
            rep += '\n'
            rep += 'Atom field - {}'.format(p)
        for j in sorted(self.__dict__.keys() - list(SmallMol._atom_fields)):
            if j[0] == '_':
                continue
            rep += '\n'
            rep += formatstr(j, self.__dict__[j])

        return rep

if __name__ == '__main__':

    import doctest
    import os
    from moleculekit.home import home
    from moleculekit.smallmol.smallmollib import SmallMolLib

    lib = SmallMolLib(os.path.join(home(dataDir='test-smallmol'), 'fda_drugs_light.sdf'))
    sm = SmallMol(os.path.join(home(dataDir='test-smallmol'), 'benzamidine.mol2'))
    doctest.testmod(extraglobs={'lib': lib.copy(), 'sm': sm.copy()})

Ejemplo n.º 22
0
                rep = f"{name}: {len(self.reps.replist)}"
            else:
                rep = f"{name}: {field}"
            return rep

        rep = f"SmallMol with {self.numAtoms} atoms and {self.numFrames} conformers"
        for p in sorted(self._atom_fields):
            if p.startswith("_"):
                continue
            rep += "\n"
            rep += f"Atom field - {p}"
        for j in sorted(self.__dict__.keys() - list(SmallMol._atom_fields)):
            if j[0] == "_":
                continue
            rep += "\n"
            rep += formatstr(j, self.__dict__[j])

        return rep


if __name__ == "__main__":
    import doctest
    from moleculekit.home import home
    from moleculekit.smallmol.smallmollib import SmallMolLib

    lib = SmallMolLib(
        os.path.join(home(dataDir="test-smallmol"), "fda_drugs_light.sdf"))
    sm = SmallMol(
        os.path.join(home(dataDir="test-smallmol"), "benzamidine.mol2"))
    doctest.testmod(extraglobs={"lib": lib.copy(), "sm": sm.copy()})