def test_duplicate_fprint_names_detected(self): from e3fp.fingerprint.fprint import CountFingerprint from e3fp.fingerprint.db import FingerprintDatabase array = (np.random.uniform(0, 1, size=(10, 1024)) > 0.9).astype( np.double ) fprints = [ CountFingerprint.from_vector(array[i, :]) for i in range(10) ] for i, fp in enumerate(fprints): fp.name = str(i) db = FingerprintDatabase(fp_type=CountFingerprint) db.add_fingerprints(fprints) fprints2 = [ CountFingerprint.from_vector(array[i, :]) for i in range(10) ] for i, fp in enumerate(fprints2): fp.name = str(i) db.add_fingerprints(fprints2) self.assertIs(db.fp_type, CountFingerprint) self.assertTrue(np.issubdtype(db.array.dtype, np.uint16)) self.assertEqual(db.fp_num, 20) self.assertEqual(db.bits, 1024) self.assertEqual(len(db.fp_names_to_indices), 10) for k, v in db.fp_names_to_indices.items(): k = int(k) self.assertEqual(k, v[0]) self.assertEqual(k, v[1] - 10) np.testing.assert_almost_equal( array[k % 10, :], db.array[k, :].todense().getA().flatten() )
def test_update_props(self): from e3fp.fingerprint.fprint import CountFingerprint from e3fp.fingerprint.db import FingerprintDatabase array = (np.random.uniform(0, 1, size=(10, 1024)) > 0.9).astype( np.double ) fprints = [ CountFingerprint.from_vector(array[i, :]) for i in range(10) ] for i, fp in enumerate(fprints): fp.name = str(i) fp.set_prop("index", i) db = FingerprintDatabase(fp_type=CountFingerprint) db.add_fingerprints(fprints) fprints2 = [ CountFingerprint.from_vector(array[i, :]) for i in range(10) ] for i, fp in enumerate(fprints2): fp.name = str(i + len(fprints)) fp.set_prop("index", i) db.add_fingerprints(fprints2) indices = db.get_prop("index") self.assertEqual(indices.shape[0], 20) self.assertListEqual( indices.tolist(), list(range(10)) + list(range(10)) )
def test_countfprint_from_counts(self): from e3fp.fingerprint.fprint import CountFingerprint in_counts = {3: 1, 1: 4, 5: 1} bits = 32 fprint = CountFingerprint.from_counts(in_counts, bits=bits) out_counts = fprint.counts self.assertEqual(in_counts, out_counts)
def fingerprint(self): """ Compute the fingerprint for a compound :returns: The fingerprint sparse vector dict """ try: return getattr(self, '_fp') except AttributeError: pass self._fp = CountFingerprint(counts={}) if self.mol() is not None: fp = Fingerprint.from_rdkit(self.mol()) self._fp = CountFingerprint.from_fingerprint(fp) return self._fp