def _transform_mol(self, mol): """Private method to transform a skchem molecule. Use `transform` for the public method, which genericizes the argument to iterables of mols. Args: mol (skchem.Mol): Molecule to calculate fingerprint for. Returns: np.array or dict: Fingerprint as an array (or a dict if sparse). """ if self.as_bits and self.n_feats > 0: fp = GetMorganFingerprintAsBitVect( mol, self.radius, nBits=self.n_feats, useFeatures=self.use_features, useBondTypes=self.use_bond_types, useChirality=self.use_chirality) res = np.array(0) ConvertToNumpyArray(fp, res) res = res.astype(np.uint8) else: if self.n_feats <= 0: res = GetMorganFingerprint(mol, self.radius, useFeatures=self.use_features, useBondTypes=self.use_bond_types, useChirality=self.use_chirality) res = res.GetNonzeroElements() if self.as_bits: res = {k: int(v > 0) for k, v in res.items()} else: res = GetHashedMorganFingerprint( mol, self.radius, nBits=self.n_feats, useFeatures=self.use_features, useBondTypes=self.use_bond_types, useChirality=self.use_chirality) res = np.array(list(res)) return res
def test__hashed_binary_fingerprints__ecfp(self) -> None: fprintr = CircularFPFeaturizer(fp_mode="binary") fps_mat_smi = fprintr.fit_transform(self.smis) # using SMILES fps_mat_mol = fprintr.fit_transform(self.mols) # using Mol objects # Output shape self.assertEqual(fps_mat_smi.shape[0], self.n_mols) self.assertEqual(fps_mat_smi.shape[1], fprintr.max_hash_value_) self.assertEqual(fps_mat_mol.shape[0], self.n_mols) self.assertEqual(fps_mat_mol.shape[1], fprintr.max_hash_value_) # Fingerprint matrix structure for i, mol in enumerate(self.mols): fps_ref = GetMorganFingerprint(mol, radius=fprintr.radius, useFeatures=fprintr.use_features_, useChirality=fprintr.use_chirality, useCounts=fprintr.use_counts_) for hash in fps_ref.GetNonzeroElements(): self.assertTrue(fps_mat_smi[i, hash]) self.assertTrue(fps_mat_mol[i, hash]) # No other elements are set self.assertEqual(np.sum(fps_mat_smi[i, :].data), len(fps_ref.GetNonzeroElements())) self.assertEqual(np.sum(fps_mat_mol[i, :].data), len(fps_ref.GetNonzeroElements()))
def transform_mol(self, molecule: Chem.rdchem.Mol) -> Tuple[np.ndarray, bool]: use_chirality = self.__dict__.get('use_chirality', False) fp = GetMorganFingerprint( molecule, radius=self.radius, useFeatures=self.use_features, useCounts=self.use_counts, useChirality=use_chirality, **self.fingerprint_extra_args, ) fp = rdkit_sparse_array_to_np(fp.GetNonzeroElements().items(), use_counts=self.use_counts, fp_size=self.fp_size) return fp, True
def test__string_output_format(self) -> None: fprintr = CircularFPFeaturizer(output_format="sparse_string") fps_str = fprintr.fit_transform(self.smis) # using SMILES # Output shape self.assertEqual(self.n_mols, len(fps_str)) # Fingerprint matrix structure for i, mol in enumerate(self.mols): fps_ref = GetMorganFingerprint(mol, radius=fprintr.radius, useFeatures=fprintr.use_features_, useChirality=fprintr.use_chirality, useCounts=fprintr.use_counts_) fp_i_from_str = eval("{" + fps_str[i] + "}") for hash, cnt in fps_ref.GetNonzeroElements().items(): self.assertEqual(fp_i_from_str[hash], cnt)
def test__hashed_counting_fingerprints__fcfp(self) -> None: fprintr = CircularFPFeaturizer(fp_type="FCFP") fps_mat_smi = fprintr.fit_transform(self.smis) # using SMILES fps_mat_mol = fprintr.fit_transform(self.mols) # using Mol objects # Output shape self.assertEqual(fps_mat_smi.shape[0], self.n_mols) self.assertEqual(fps_mat_smi.shape[1], fprintr.max_hash_value_) self.assertEqual(fps_mat_mol.shape[0], self.n_mols) self.assertEqual(fps_mat_mol.shape[1], fprintr.max_hash_value_) # Fingerprint matrix structure for i, mol in enumerate(self.mols): fps_ref = GetMorganFingerprint(mol, radius=fprintr.radius, useFeatures=fprintr.use_features_, useChirality=fprintr.use_chirality, useCounts=fprintr.use_counts_) for hash, cnt in fps_ref.GetNonzeroElements().items(): self.assertEqual(fps_mat_smi[i, hash], cnt) self.assertEqual(fps_mat_mol[i, hash], cnt)