def test_fcfp(): """FCFP fingerprints""" mol1 = oddt.toolkit.readstring( "smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol2 = oddt.toolkit.readstring( "smi", "CC1=C(C(=CC=C1)O)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False, use_pharm_features=True) mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False, use_pharm_features=True) ref1 = [ 46, 111, 305, 310, 362, 384, 409, 451, 467, 548, 572, 595, 607, 608, 620, 659, 691, 699, 724, 743, 752, 842, 926, 935, 974, 1037, 1072, 1094, 1135, 1143, 1161, 1172, 1313, 1325, 1368, 1399, 1461, 1486, 1488, 1492, 1603, 1619, 1648, 1665, 1666, 1838, 1887, 1900, 1948, 1961, 1972, 1975, 1996, 2000, 2052, 2085, 2094, 2174, 2232, 2236, 2368, 2382, 2383, 2402, 2483, 2492, 2527, 2593, 2616, 2706, 2789, 2899, 2922, 2945, 2966, 3102, 3117, 3176, 3189, 3215, 3225, 3297, 3326, 3349, 3373, 3513, 3525, 3535, 3601, 3619, 3780, 3820, 3897, 3919, 3976, 3981, 4050, 4079, 4091 ] ref2 = [ 46, 111, 143, 172, 259, 305, 362, 409, 451, 467, 507, 518, 548, 583, 595, 607, 608, 620, 639, 691, 693, 724, 752, 784, 825, 842, 926, 1037, 1087, 1094, 1098, 1135, 1143, 1161, 1172, 1286, 1325, 1368, 1371, 1395, 1399, 1461, 1486, 1488, 1492, 1565, 1619, 1648, 1655, 1665, 1887, 1890, 1900, 1948, 1961, 1968, 1972, 1975, 1976, 1996, 2000, 2007, 2094, 2125, 2174, 2232, 2236, 2368, 2382, 2383, 2483, 2492, 2571, 2593, 2606, 2638, 2706, 2789, 2922, 2945, 2966, 2986, 3030, 3100, 3102, 3117, 3227, 3326, 3350, 3373, 3406, 3419, 3535, 3577, 3619, 3697, 3742, 3820, 3839, 3919, 3981, 4043, 4050, 4079, 4091 ] assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.64074074) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.5) # adding Hs should not change anything mol1.addh() mol2.addh() assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.64074074) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.5)
def test_ecfp_invaraiants(): """ECFP: test random reordering""" sildenafil = oddt.toolkit.readstring("smi", "CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12") params = {'depth': 4, 'size': 4096, 'sparse': True} fp = ECFP(sildenafil, **params) for n in range(10): sildenafil = shuffle_mol(sildenafil) assert_array_equal(fp, ECFP(sildenafil, **params))
def test_sparse_similarity(): """Sparse similarity""" mol1 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol2 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)O)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol1_fp_dense = ECFP(mol1, depth=8, size=4096, sparse=False) mol2_fp_dense = ECFP(mol2, depth=8, size=4096, sparse=False) mol1_fp_sparse = ECFP(mol1, depth=8, size=4096, sparse=True) mol2_fp_sparse = ECFP(mol2, depth=8, size=4096, sparse=True) assert_almost_equal(dice(mol1_fp_sparse, mol2_fp_sparse, sparse=True), dice(mol1_fp_dense, mol2_fp_dense)) assert_equal(dice([], [], sparse=True), 0.) assert_equal(dice(np.zeros(10), np.zeros(10), sparse=False), 0.) assert_almost_equal(tanimoto(mol1_fp_sparse, mol2_fp_sparse, sparse=True), tanimoto(mol1_fp_dense, mol2_fp_dense)) assert_equal(tanimoto([], [], sparse=True), 0.) assert_equal(tanimoto(np.zeros(10), np.zeros(10), sparse=False), 0.)
def ecfp(smiles): mol = oddt.toolkit.readstring("smi", smiles) fp = ECFP(mol, depth=2, size=4096, sparse=False) fpl = list(fp) rep = [] for i in range(len(fpl)): rep.append(fpl[i]) return rep
def test_ecfp(): """ECFP fingerprints""" mol1 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol2 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)O)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False) mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False) ref1 = [2, 100, 176, 185, 200, 203, 359, 382, 447, 509, 518, 550, 572, 583, 598, 606, 607, 684, 818, 821, 832, 861, 960, 992, 1006, 1019, 1042, 1050, 1059, 1103, 1175, 1281, 1315, 1377, 1431, 1470, 1479, 1512, 1577, 1588, 1598, 1620, 1633, 1647, 1663, 1723, 1749, 1751, 1775, 1781, 1821, 1837, 1899, 1963, 1969, 1986, 2013, 2253, 2343, 2355, 2368, 2435, 2547, 2654, 2657, 2702, 2722, 2725, 2803, 2816, 2853, 2870, 2920, 2992, 3028, 3056, 3074, 3103, 3190, 3203, 3277, 3321, 3362, 3377, 3383, 3401, 3512, 3546, 3552, 3585, 3593, 3617, 3674, 3759, 3784, 3790, 3832, 3895, 3937, 3956, 3974, 4007, 4033] ref2 = [43, 100, 176, 200, 203, 231, 382, 396, 447, 490, 518, 583, 606, 607, 650, 818, 821, 832, 840, 861, 907, 950, 960, 992, 1006, 1013, 1019, 1042, 1050, 1059, 1103, 1104, 1112, 1175, 1281, 1293, 1315, 1377, 1431, 1470, 1512, 1543, 1577, 1588, 1598, 1633, 1647, 1663, 1723, 1749, 1751, 1757, 1759, 1775, 1781, 1821, 1837, 1880, 1963, 1969, 1986, 2253, 2355, 2368, 2435, 2544, 2547, 2654, 2702, 2722, 2725, 2726, 2799, 2816, 2853, 2870, 2920, 2992, 3028, 3074, 3190, 3203, 3277, 3290, 3333, 3362, 3383, 3401, 3512, 3546, 3552, 3585, 3593, 3617, 3640, 3660, 3674, 3759, 3784, 3790, 3805, 3832, 3856, 3895, 3924, 3956, 3974, 3992, 4007, 4033] assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.69999999) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.63846153) # adding Hs should not change anything mol1.addh() mol2.addh() mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False) mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False) assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.69999999) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.63846153) # removig Hs should not change anything mol1.removeh() mol2.removeh() mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False) mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False) assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.69999999) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.63846153)
def time_ecfp_pharm(self): for mol in self.mols: ECFP(mol, depth=4, use_pharm_features=True)
def time_ecfp_dense(self): for mol in self.mols: ECFP(mol, depth=4, sparse=False)
def time_ecfp(self): for mol in self.mols: ECFP(mol, depth=4)