def test_plec_similarity(): """PLEC similarity""" mols = list( oddt.toolkit.readfile( 'sdf', os.path.join(test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) mols = list(filter(lambda x: x.title == '312335', mols)) list(map(lambda x: x.addh(only_polar=True), mols)) receptor = next( oddt.toolkit.readfile( 'pdb', os.path.join(test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb'))) receptor.protein = True receptor.addh(only_polar=True) reference_sparse = PLEC(mols[0], receptor) outcome_sparse = [ dice(reference_sparse, PLEC(mol, receptor), sparse=True) for mol in mols[1:] ] target_outcome = np.array([ 0.833, 0.729, 0.849, 0.785, 0.821, 0.604, 0.868, 0.656, 0.712, 0.652, 0.699, 0.785, 0.736, 0.745, 0.661, 0.667, 0.555, 0.616, 0.714 ]) reference_dense = PLEC(mols[0], receptor, sparse=False) outcome_dense = [ dice(reference_dense, PLEC(mol, receptor, sparse=False), sparse=False) for mol in mols[1:] ] assert_array_almost_equal(outcome_sparse, target_outcome, decimal=2) assert_array_almost_equal(outcome_dense, target_outcome, decimal=2)
def test_ecfp(): """ECFP fingerprints""" mol1 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol2 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)O)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False) mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False) ref1 = [2, 100, 176, 185, 200, 203, 359, 382, 447, 509, 518, 550, 572, 583, 598, 606, 607, 684, 818, 821, 832, 861, 960, 992, 1006, 1019, 1042, 1050, 1059, 1103, 1175, 1281, 1315, 1377, 1431, 1470, 1479, 1512, 1577, 1588, 1598, 1620, 1633, 1647, 1663, 1723, 1749, 1751, 1775, 1781, 1821, 1837, 1899, 1963, 1969, 1986, 2013, 2253, 2343, 2355, 2368, 2435, 2547, 2654, 2657, 2702, 2722, 2725, 2803, 2816, 2853, 2870, 2920, 2992, 3028, 3056, 3074, 3103, 3190, 3203, 3277, 3321, 3362, 3377, 3383, 3401, 3512, 3546, 3552, 3585, 3593, 3617, 3674, 3759, 3784, 3790, 3832, 3895, 3937, 3956, 3974, 4007, 4033] ref2 = [43, 100, 176, 200, 203, 231, 382, 396, 447, 490, 518, 583, 606, 607, 650, 818, 821, 832, 840, 861, 907, 950, 960, 992, 1006, 1013, 1019, 1042, 1050, 1059, 1103, 1104, 1112, 1175, 1281, 1293, 1315, 1377, 1431, 1470, 1512, 1543, 1577, 1588, 1598, 1633, 1647, 1663, 1723, 1749, 1751, 1757, 1759, 1775, 1781, 1821, 1837, 1880, 1963, 1969, 1986, 2253, 2355, 2368, 2435, 2544, 2547, 2654, 2702, 2722, 2725, 2726, 2799, 2816, 2853, 2870, 2920, 2992, 3028, 3074, 3190, 3203, 3277, 3290, 3333, 3362, 3383, 3401, 3512, 3546, 3552, 3585, 3593, 3617, 3640, 3660, 3674, 3759, 3784, 3790, 3805, 3832, 3856, 3895, 3924, 3956, 3974, 3992, 4007, 4033] assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.69999999) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.63846153) # adding Hs should not change anything mol1.addh() mol2.addh() mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False) mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False) assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.69999999) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.63846153) # removig Hs should not change anything mol1.removeh() mol2.removeh() mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False) mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False) assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.69999999) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.63846153)
def test_fcfp(): """FCFP fingerprints""" mol1 = oddt.toolkit.readstring( "smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol2 = oddt.toolkit.readstring( "smi", "CC1=C(C(=CC=C1)O)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False, use_pharm_features=True) mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False, use_pharm_features=True) ref1 = [ 46, 111, 305, 310, 362, 384, 409, 451, 467, 548, 572, 595, 607, 608, 620, 659, 691, 699, 724, 743, 752, 842, 926, 935, 974, 1037, 1072, 1094, 1135, 1143, 1161, 1172, 1313, 1325, 1368, 1399, 1461, 1486, 1488, 1492, 1603, 1619, 1648, 1665, 1666, 1838, 1887, 1900, 1948, 1961, 1972, 1975, 1996, 2000, 2052, 2085, 2094, 2174, 2232, 2236, 2368, 2382, 2383, 2402, 2483, 2492, 2527, 2593, 2616, 2706, 2789, 2899, 2922, 2945, 2966, 3102, 3117, 3176, 3189, 3215, 3225, 3297, 3326, 3349, 3373, 3513, 3525, 3535, 3601, 3619, 3780, 3820, 3897, 3919, 3976, 3981, 4050, 4079, 4091 ] ref2 = [ 46, 111, 143, 172, 259, 305, 362, 409, 451, 467, 507, 518, 548, 583, 595, 607, 608, 620, 639, 691, 693, 724, 752, 784, 825, 842, 926, 1037, 1087, 1094, 1098, 1135, 1143, 1161, 1172, 1286, 1325, 1368, 1371, 1395, 1399, 1461, 1486, 1488, 1492, 1565, 1619, 1648, 1655, 1665, 1887, 1890, 1900, 1948, 1961, 1968, 1972, 1975, 1976, 1996, 2000, 2007, 2094, 2125, 2174, 2232, 2236, 2368, 2382, 2383, 2483, 2492, 2571, 2593, 2606, 2638, 2706, 2789, 2922, 2945, 2966, 2986, 3030, 3100, 3102, 3117, 3227, 3326, 3350, 3373, 3406, 3419, 3535, 3577, 3619, 3697, 3742, 3820, 3839, 3919, 3981, 4043, 4050, 4079, 4091 ] assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.64074074) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.5) # adding Hs should not change anything mol1.addh() mol2.addh() assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.64074074) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.5)
def test_fcfp(): """FCFP fingerprints""" mol1 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol2 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)O)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol1_fp = ECFP(mol1, depth=8, size=4096, sparse=False, use_pharm_features=True) mol2_fp = ECFP(mol2, depth=8, size=4096, sparse=False, use_pharm_features=True) ref1 = [46, 111, 305, 310, 362, 384, 409, 451, 467, 548, 572, 595, 607, 608, 620, 659, 691, 699, 724, 743, 752, 842, 926, 935, 974, 1037, 1072, 1094, 1135, 1143, 1161, 1172, 1313, 1325, 1368, 1399, 1461, 1486, 1488, 1492, 1603, 1619, 1648, 1665, 1666, 1838, 1887, 1900, 1948, 1961, 1972, 1975, 1996, 2000, 2052, 2085, 2094, 2174, 2232, 2236, 2368, 2382, 2383, 2402, 2483, 2492, 2527, 2593, 2616, 2706, 2789, 2899, 2922, 2945, 2966, 3102, 3117, 3176, 3189, 3215, 3225, 3297, 3326, 3349, 3373, 3513, 3525, 3535, 3601, 3619, 3780, 3820, 3897, 3919, 3976, 3981, 4050, 4079, 4091] ref2 = [46, 111, 143, 172, 259, 305, 362, 409, 451, 467, 507, 518, 548, 583, 595, 607, 608, 620, 639, 691, 693, 724, 752, 784, 825, 842, 926, 1037, 1087, 1094, 1098, 1135, 1143, 1161, 1172, 1286, 1325, 1368, 1371, 1395, 1399, 1461, 1486, 1488, 1492, 1565, 1619, 1648, 1655, 1665, 1887, 1890, 1900, 1948, 1961, 1968, 1972, 1975, 1976, 1996, 2000, 2007, 2094, 2125, 2174, 2232, 2236, 2368, 2382, 2383, 2483, 2492, 2571, 2593, 2606, 2638, 2706, 2789, 2922, 2945, 2966, 2986, 3030, 3100, 3102, 3117, 3227, 3326, 3350, 3373, 3406, 3419, 3535, 3577, 3619, 3697, 3742, 3820, 3839, 3919, 3981, 4043, 4050, 4079, 4091] assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.64074074) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.5) # adding Hs should not change anything mol1.addh() mol2.addh() assert_array_equal(ref1, np.where(mol1_fp)[0]) assert_array_equal(ref2, np.where(mol2_fp)[0]) assert_almost_equal(dice(mol1_fp, mol2_fp), 0.64074074) assert_almost_equal(tanimoto(mol1_fp, mol2_fp), 0.5)
def test_sparse_similarity(): """Sparse similarity""" mol1 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol2 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)O)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol1_fp_dense = ECFP(mol1, depth=8, size=4096, sparse=False) mol2_fp_dense = ECFP(mol2, depth=8, size=4096, sparse=False) mol1_fp_sparse = ECFP(mol1, depth=8, size=4096, sparse=True) mol2_fp_sparse = ECFP(mol2, depth=8, size=4096, sparse=True) assert_almost_equal(dice(mol1_fp_sparse, mol2_fp_sparse, sparse=True), dice(mol1_fp_dense, mol2_fp_dense)) assert_equal(dice([], [], sparse=True), 0.) assert_equal(dice(np.zeros(10), np.zeros(10), sparse=False), 0.) assert_almost_equal(tanimoto(mol1_fp_sparse, mol2_fp_sparse, sparse=True), tanimoto(mol1_fp_dense, mol2_fp_dense)) assert_equal(tanimoto([], [], sparse=True), 0.) assert_equal(tanimoto(np.zeros(10), np.zeros(10), sparse=False), 0.)
def test_sparse_similarity(): """Sparse similarity""" mol1 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)C)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol2 = oddt.toolkit.readstring("smi", "CC1=C(C(=CC=C1)O)NC(=O)CN2CCN(CC2)CC(=O)N3CCC4=C(C3)C=CS4") mol1_fp_dense = ECFP(mol1, depth=8, size=4096, sparse=False) mol2_fp_dense = ECFP(mol2, depth=8, size=4096, sparse=False) mol1_fp_sparse = ECFP(mol1, depth=8, size=4096, sparse=True) mol2_fp_sparse = ECFP(mol2, depth=8, size=4096, sparse=True) assert_almost_equal(dice(mol1_fp_sparse, mol2_fp_sparse, sparse=True), dice(mol1_fp_dense, mol2_fp_dense)) assert dice([], [], sparse=True) == 0. assert dice(np.zeros(10), np.zeros(10), sparse=False) == 0. assert_almost_equal(tanimoto(mol1_fp_sparse, mol2_fp_sparse, sparse=True), tanimoto(mol1_fp_dense, mol2_fp_dense)) assert tanimoto([], [], sparse=True) == 0. assert tanimoto(np.zeros(10), np.zeros(10), sparse=False) == 0.
def test_plec_similarity(): """PLEC similarity""" mols = list(oddt.toolkit.readfile('sdf', os.path.join( test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) mols = list(filter(lambda x: x.title == '312335', mols)) list(map(lambda x: x.addh(only_polar=True), mols)) receptor = next(oddt.toolkit.readfile('pdb', os.path.join( test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb'))) receptor.protein = True receptor.addh(only_polar=True) reference_sparse = PLEC(mols[0], receptor) outcome_sparse = [dice(reference_sparse, PLEC(mol, receptor), sparse=True) for mol in mols[1:]] target_outcome = np.array([0.833, 0.729, 0.849, 0.785, 0.821, 0.604, 0.868, 0.656, 0.712, 0.652, 0.699, 0.785, 0.736, 0.745, 0.661, 0.667, 0.555, 0.616, 0.714]) reference_dense = PLEC(mols[0], receptor, sparse=False) outcome_dense = [dice(reference_dense, PLEC(mol, receptor, sparse=False), sparse=False) for mol in mols[1:]] assert_array_almost_equal(outcome_sparse, target_outcome, decimal=2) assert_array_almost_equal(outcome_dense, target_outcome, decimal=2)
def test_similarity(): """FP similarity""" mols = list( oddt.toolkit.readfile( 'sdf', os.path.join(test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) mols = list(filter(lambda x: x.title == '312335', mols)) list(map(lambda x: x.addh(only_polar=True), mols)) receptor = next( oddt.toolkit.readfile( 'pdb', os.path.join(test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb'))) receptor.protein = True receptor.addh(only_polar=True) ref = SimpleInteractionFingerprint(mols[0], receptor) outcome = [ dice(ref, SimpleInteractionFingerprint(mol, receptor)) for mol in mols[1:] ] if oddt.toolkit.backend == 'ob': target_outcome = np.array([ 0.8, 0.625, 0.764706, 0.628571, 0.764706, 0.611111, 0.787879, 0.6, 0.62069, 0.6875, 0.555556, 0.727273, 0.642857, 0.685714, 0.736842, 0.666667, 0.484848, 0.533333, 0.588235 ]) else: target_outcome = np.array([ 0.810811, 0.625, 0.777778, 0.611111, 0.777778, 0.648649, 0.787879, 0.6, 0.6, 0.666667, 0.578947, 0.742857, 0.62069, 0.628571, 0.736842, 0.645161, 0.571429, 0.580645, 0.628571 ]) assert_array_almost_equal(outcome, target_outcome) outcome = [ tanimoto(ref, SimpleInteractionFingerprint(mol, receptor)) for mol in mols[1:] ] if oddt.toolkit.backend == 'ob': target_outcome = np.array([ 0.75, 0.5, 0.727273, 0.538462, 0.727273, 0.727273, 0.8, 0.636364, 0.545455, 0.636364, 0.636364, 0.636364, 0.7, 0.727273, 0.75, 0.636364, 0.454545, 0.454545, 0.416667 ]) else: target_outcome = np.array([ 0.75, 0.416667, 0.727273, 0.538462, 0.727273, 0.727273, 0.7, 0.636364, 0.545455, 0.545455, 0.636364, 0.636364, 0.6, 0.636364, 0.75, 0.545455, 0.545455, 0.454545, 0.416667 ]) assert_array_almost_equal(outcome, target_outcome)
def test_similarity(): """FP similarity""" mols = list(oddt.toolkit.readfile('sdf', os.path.join( test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) mols = list(filter(lambda x: x.title == '312335', mols)) list(map(lambda x: x.addh(only_polar=True), mols)) receptor = next(oddt.toolkit.readfile('pdb', os.path.join( test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb'))) receptor.protein = True receptor.addh(only_polar=True) ref = SimpleInteractionFingerprint(mols[0], receptor) outcome = [dice(ref, SimpleInteractionFingerprint( mol, receptor)) for mol in mols[1:]] if oddt.toolkit.backend == 'ob': target_outcome = np.array([0.8, 0.625, 0.764706, 0.628571, 0.764706, 0.611111, 0.787879, 0.6, 0.62069, 0.6875, 0.555556, 0.727273, 0.642857, 0.685714, 0.736842, 0.666667, 0.484848, 0.533333, 0.588235]) else: target_outcome = np.array([0.810811, 0.625, 0.777778, 0.611111, 0.777778, 0.648649, 0.787879, 0.6, 0.6, 0.666667, 0.578947, 0.742857, 0.62069, 0.628571, 0.736842, 0.645161, 0.571429, 0.580645, 0.628571]) assert_array_almost_equal(outcome, target_outcome) outcome = [tanimoto(ref, SimpleInteractionFingerprint( mol, receptor)) for mol in mols[1:]] if oddt.toolkit.backend == 'ob': target_outcome = np.array([0.75, 0.5, 0.727273, 0.538462, 0.727273, 0.727273, 0.8, 0.636364, 0.545455, 0.636364, 0.636364, 0.636364, 0.7, 0.727273, 0.75, 0.636364, 0.454545, 0.454545, 0.416667]) else: target_outcome = np.array([0.75, 0.416667, 0.727273, 0.538462, 0.727273, 0.727273, 0.7, 0.636364, 0.545455, 0.545455, 0.636364, 0.636364, 0.6, 0.636364, 0.75, 0.545455, 0.545455, 0.454545, 0.416667]) assert_array_almost_equal(outcome, target_outcome)
def test_similarity(): """FP similarity""" mols = list( oddt.toolkit.readfile( 'sdf', os.path.join(test_data_dir, 'data/dude/xiap/actives_docked.sdf'))) mols = list(filter(lambda x: x.title == '312335', mols)) list(map(lambda x: x.addh(only_polar=True), mols)) receptor = next( oddt.toolkit.readfile( 'pdb', os.path.join(test_data_dir, 'data/dude/xiap/receptor_rdkit.pdb'))) receptor.protein = True receptor.addh(only_polar=True) ref = SimpleInteractionFingerprint(mols[0], receptor) outcome = [ dice(ref, SimpleInteractionFingerprint(mol, receptor)) for mol in mols[1:] ] target_outcome = np.array([ 0.742857, 0.645161, 0.727273, 0.571429, 0.727273, 0.588235, 0.75, 0.551724, 0.551724, 0.6875, 0.514286, 0.6875, 0.592593, 0.647059, 0.736842, 0.62069, 0.545455, 0.533333, 0.606061 ]) assert_array_almost_equal(outcome, target_outcome) outcome = [ tanimoto(ref, SimpleInteractionFingerprint(mol, receptor)) for mol in mols[1:] ] target_outcome = np.array([ 0.636364, 0.5, 0.666667, 0.384615, 0.666667, 0.545455, 0.666667, 0.5, 0.363636, 0.666667, 0.555556, 0.555556, 0.625, 0.6, 0.727273, 0.555556, 0.5, 0.4, 0.363636 ]) assert_array_almost_equal(outcome, target_outcome)
def Simple_Interaction_Fingerprint(ref_input, Listoflig, proteinpath): F_Scores = [0] * len(Listoflig) # Read in protein #protein = next(oddt.toolkit.readfile('pdb', proteinpath, removeHs=False, cleanupSubstructures=False, sanitize=False)) try: protein = next( oddt.toolkit.readfile('pdb', proteinpath, removeHs=False)) protein.protein = True except Exception as e: print( "Input structure could not be split into protein and ligand. Please check ligand identifier." ) f2 = open(os.path.join(os.path.basename(proteinpath), 'ErrorLog.txt'), 'w') f2.write(str(e)) f2.close() # Read in and define the reference ligand #ref_ligand = next(oddt.toolkit.readfile('pdb', ref_input, removeHs=False, cleanupSubstructures=False, sanitize=False)) ref_ligand = next(oddt.toolkit.readfile('pdb', ref_input, removeHs=False)) ref = fp.SimpleInteractionFingerprint(ref_ligand, protein) # Loop through each ligand in the list count = 0 for ligandpath in Listoflig: #ligand = next(oddt.toolkit.readfile('pdb', ligandpath, removeHs=False, cleanupSubstructures=False, sanitize=False)) ligand = next(oddt.toolkit.readfile('pdb', ligandpath, removeHs=False)) fp_query = fp.SimpleInteractionFingerprint(ligand, protein) # similarity score for current query cur_score = fp.dice(ref, fp_query) F_Scores[count] = cur_score count = count + 1 # Returns a list of the fingerprint scores return F_Scores