def getChemblSimilarLigandsBySmile(smi, threshold=85, returnSmiles=False): """ Returns a SmallMolLib object of the ligands having a similarity with a smile of at least the specified threshold.. This molecules are retrieve from Chembl. It is possible to return also the list smiles. Parameters ---------- smi: str The smile threshold: int The threshold value to apply for the similarity search returnSmiles: bool If True, the list smiles is returned Returns ------- sm: htmd.smallmol.smallmol.SmallMol The SmallMol object smiles: str The list of smiles Example ------- >>> _, smile = getChemblLigandByDrugName('ibuprofen', returnSmile=True) # doctest: +SKIP >>> lib = getChemblSimilarLigandsBySmile(smile) # doctest: +SKIP >>> lib.numMols # doctest: +SKIP 4 >>> lib, smiles = getChemblSimilarLigandsBySmile(smile, returnSmiles=True) # doctest: +SKIP >>> len(smiles) # doctest: +SKIP 4 """ from htmd.smallmol.smallmol import SmallMolLib, SmallMol try: from chembl_webresource_client.new_client import new_client except ImportError as e: raise ImportError( 'You need to install the chembl_webresource package to use this function. Try using `conda install ' '-c chembl chembl_webresource_client`.') smi_list = [] similarity = new_client.similarity results = similarity.filter(smiles=smi, similarity=threshold).only(['molecule_structures']) results = results.all() for r in range(len(results)): tmp_smi = results[r]['molecule_structures']['canonical_smiles'] fragments = tmp_smi.split('.') fragments_len = [ len(fr) for fr in fragments ] fragment = fragments[fragments_len.index(max(fragments_len))] if fragment not in smi_list: smi_list.append(fragment) lib = SmallMolLib() for smi in smi_list: lib.appendSmallMol(SmallMol(smi)) if returnSmiles: return lib, smi_list return lib
def test_03_appendSmallMolLib(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) lib2 = SmallMolLib(sdffile) lib.appendSmallLib(lib2) n_mol2_merged = lib.numMols self.assertEqual(n_mol2_merged, SDF_N_MOLS*2, msg="The number of molecules in the SmallMolLib is not as expected." "The two sdf were not correctly merged. ")
def test_04_appendSmallMol(self): mol2file = os.path.join(self.dataDir, 'benzamidine.mol2') sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) sm = SmallMol(mol2file) lib.appendSmallMol(sm) n_mol2_append = lib.numMols self.assertEqual(n_mol2_append, SDF_N_MOLS+1, msg="The number of molecules in the SmallMolLib is not as expected." "The mol2 were not correctly append. ")
def test_02_writeSdf(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) sdfname = NamedTemporaryFile().name + '.sdf' lib.writeSdf(sdfname) sdf_exists = os.path.isfile(sdfname) self.assertTrue(sdf_exists, msg="The sdf written was not found") sdf = SmallMolLib(sdfname) self.assertIsInstance(sdf, SmallMolLib, msg="The sdf written was not correctly loaded. Probably the previous" "writing went wrong")
def test_04_loadSdffile(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) n_mols = lib.numMols self.assertEqual( n_mols, SDF_N_MOLS, 'Molecules not correctly loaded. ' 'Expected: {}; Now: {}'.format(SDF_N_MOLS, n_mols))
def test_00_getCommonStructure(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) rd_mols = lib._mols cms, cms_idxs, other_idxs = getMaximumCommonSubstructure( rd_mols, returnAtomIdxs=True) self.assertIsInstance(cms, rdkit.Chem.rdchem.Mol, msg="The object is not a rdkit Molecule object") ref_cms_natoms = CMS_N_ATOMS ref_atomidx_mol0 = CMS_AIDX_MOL0 cms_atomidx_mol0 = cms_idxs[0] cms_natoms = len(cms_atomidx_mol0) self.assertEqual(cms_atomidx_mol0, ref_atomidx_mol0, msg="The atomidx of the CMS for the mol0 are not the" " expected ones") self.assertEqual( cms_natoms, ref_cms_natoms, msg="The number of atom in the CMS for the mol0 are not the" " expected ones")
def test_07_depict(self): import IPython refimg = os.path.join(self.dataDir, 'sdf.svg') sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) img_name = NamedTemporaryFile().name + '.svg' lib.depict(sketch=True, filename=img_name) _img = lib.depict(sketch=True, ipython=True) refimg_size = os.path.getsize(refimg) sm_img_size = os.path.getsize(img_name) self.assertIsInstance(_img, IPython.core.display.SVG, msg="The object is not an IPython image as expected") self.assertEqual(sm_img_size, refimg_size, msg="The svg image does not have the same size of the reference")
def test_06_convertToDataFrame(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) df = lib.toDataFrame() self.assertIsInstance(df, core.frame.DataFrame, msg="The SmallMolLib object was not correctly converted into pandas" "DataFrame") cols = df.columns.tolist() ref_cols =SDF_FIELDS self.assertEqual(cols, ref_cols, msg="The fields in the SmallMolLib object was not the expected one") ligname_99 = df.iloc[99][0] ref_ligname = SDF_LOC_0_99 self.assertEqual(ligname_99, ref_ligname, msg="The ligand name found is not the expected one")
def test_06_cluster_shape(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) mols = lib._mols cl, det = cluster(mols, 'shape', returnDetails=True) n_clusters = det['numClusters'] self.assertIsInstance(n_clusters, np.int64, msg="None valid number of clusters")
def test_01_cluster_maccs(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) mols = lib._mols cl, det = cluster(mols, 'maccs', returnDetails=True) ref_ncluster = MACCS_N_CLUSTER ref_populations = MACCS_POPULATION_CLUSTER ncluster = det['numClusters'] population = det['populations'].tolist() self.assertEqual( ncluster, ref_ncluster, msg="The number of cluster identified are not as expected") self.assertEqual( population, ref_populations, msg="The population fo the cluster are not the expected one")
def test_05_removeMols(self): sdffile = os.path.join(self.dataDir, 'fda_drugs_light.sdf') lib = SmallMolLib(sdffile) mols_ids = SDF_IDS_DELETE ref_mols_name = SDF_MOLNAME_DELETE mols_name = [ s.ligname for s in lib.getMols(mols_ids) ] self.assertListEqual(mols_name, ref_mols_name, msg="The molecules at the given indexes do not match with the" "expected") lib.removeMols(mols_ids) mols_name_now = [ s.ligname for s in lib.getMols(mols_ids) ] self.assertFalse(mols_name_now == mols_name, msg="The molecules seem to not be deleted correctly")