def test_ecfp(self): """Test Butyramide (example used in ECFP paper)""" mol = smilin("CCCC(=O)N") # ECFP_0 fp = ecfp(mol, radius=0) assert len(fp) == 5 # ECFP_2 fp = ecfp(mol, radius=1) assert len(fp) == 11 # ECFP_4 fp = ecfp(mol, radius=2) assert len(fp) == 14 # ECFP_6 fp = ecfp(mol, radius=3) assert len(fp) == 14
def fingerprint(self): """ Compute the fingerprint for a compound :returns: The fingerprint sparse vector dict """ try: return getattr(self, '_fp') except AttributeError: pass self._fp = {} if self.mol() is not None: self._fp = ecfp(self.mol()) return self._fp
def molecule_embedding(filename): dict = {} df = pd.read_excel(filename) for id, smile in zip(df.MOLENAME, df.SMILES): try: mol = smilin(smile) try: fp = ecfp(mol, radius=3) n_bits = 1024 # Number of bits in fixed-length fingerprint fingerprint = [0 for _ in range(n_bits) ] #fingerprints as a python list for nbrhood_hash in fp.keys(): bit = nbrhood_hash % n_bits fingerprint[bit] = 1 #print(fingerprint) dict[id] = fingerprint except: print('Bad ecfp for', id) except: print('Bad mol for', id) return list(dict.values())