def _transform_mol(self, mol): """Private method to transform a skchem molecule. Use `transform` for the public method, which genericizes the argument to iterables of mols. Args: mol (skchem.Mol): Molecule to calculate fingerprint for. Returns: np.array or dict: Fingerprint as an array (or a dict if sparse). """ if self.as_bits and self.n_feats > 0: fp = GetMorganFingerprintAsBitVect( mol, self.radius, nBits=self.n_feats, useFeatures=self.use_features, useBondTypes=self.use_bond_types, useChirality=self.use_chirality) res = np.array(0) ConvertToNumpyArray(fp, res) res = res.astype(np.uint8) else: if self.n_feats <= 0: res = GetMorganFingerprint(mol, self.radius, useFeatures=self.use_features, useBondTypes=self.use_bond_types, useChirality=self.use_chirality) res = res.GetNonzeroElements() if self.as_bits: res = {k: int(v > 0) for k, v in res.items()} else: res = GetHashedMorganFingerprint( mol, self.radius, nBits=self.n_feats, useFeatures=self.use_features, useBondTypes=self.use_bond_types, useChirality=self.use_chirality) res = np.array(list(res)) return res
def _one_cats(mol): """ Function to calculate the CATS pharmacophore descriptor for one molecule. Descriptions of the individual features can be obtained from the function ``get_cats_sigfactory``. :param mol: {RDKit molecule} molecule to calculate the descriptor for :return: {numpy.ndarray} calculated descriptor vector """ factory = get_cats_factory() arr = np.zeros((1,)) ConvertToNumpyArray(Generate.Gen2DFingerprint(mol, factory), arr) scale = np.array([10 * [sum(arr[i:i + 10])] for i in range(0, 210, 10)]).flatten() return np.divide(arr, scale, out=np.zeros_like(arr), where=scale != 0).astype('float32')
def _create_fp(smile: AnyStr, radius: int = 2, nBits: int = 2048) -> np.ndarray: atorvation: rdkit.Chem.Mol = Chem.MolFromSmiles(smile) fingerprint = GetMorganFingerprintAsBitVect(atorvation, radius=radius, nBits=nBits) fp_array: np.ndarray = np.zeros((1, )) ConvertToNumpyArray(fingerprint, fp_array) return fp_array
def _rdk2numpy(fps): """ private function to transform RDKit fingerprints into numpy arrays :param fps: {list} list of RDKit fingerprints :return: {numpy.ndarray} fingerprints in array """ np_fps = [] for fp in fps: arr = np.zeros((1,)) ConvertToNumpyArray(fp, arr) np_fps.append(arr) return np.array(np_fps).reshape((len(fps), len(np_fps[0])))
def create_mol(df_l, n_bits): # Construct a molecule from a SMILES string # Generate mol column: Returns a Mol object, None on failure. df_l['mol'] = df_l.smiles.apply(Chem.MolFromSmiles) # Create a column for storing the molecular fingerprint as fingerprint object df_l['bv'] = df_l.mol.apply( # Apply the lambda function "calculate_fp" for each molecule lambda x: calculate_fp(x, 'maccs', n_bits) ) # Allocate np.array to hold fp bit-vector (np = numpy) df_l['np_bv'] = np.zeros((len(df_l), df_l['bv'][0].GetNumBits())).tolist() df_l.np_bv = df_l.np_bv.apply(np.array) # Convert the object fingerprint to NumpyArray and store in np_bv df_l.apply(lambda x: ConvertToNumpyArray(x.bv, x.np_bv), axis=1)
def _cats_corr(mols, q): """ private cats descriptor function to be used in multiprocessing :param mols: {list/array} molecules (RDKit mol) to calculate the descriptor for :param q: {queue} multiprocessing queue instance :return: {numpy.ndarray} calculated descriptor vectors """ factory = get_cats_factory() fps = [] for mol in mols: arr = np.zeros((1,)) ConvertToNumpyArray(Generate.Gen2DFingerprint(mol, factory), arr) scale = np.array([10 * [sum(arr[i:i + 10])] for i in range(0, 210, 10)]).flatten() fps.append(np.divide(arr, scale, out=np.zeros_like(arr), where=scale != 0)) q.put(np.array(fps).reshape((len(mols), 210)).astype('float32'))
def _transform_mol(self, mol): """ Private method to transform a skchem molecule. Args: mol (skchem.Mol): Molecule to calculate fingerprint for. Returns: np.array or dict: Fingerprint as an array (or a dict if sparse). """ if self.as_bits and self.n_feats > 0: fp = GetHashedTopologicalTorsionFingerprintAsBitVect( mol, nBits=self.n_feats, targetSize=self.target_size, includeChirality=self.use_chirality) res = np.array(0) ConvertToNumpyArray(fp, res) res = res.astype(np.uint8) else: if self.n_feats <= 0: res = GetTopologicalTorsionFingerprint( mol, nBits=self.n_feats, targetSize=self.target_size, includeChirality=self.use_chirality) res = res.GetNonzeroElements() if self.as_bits: res = {k: int(v > 0) for k, v in res.items()} else: res = GetHashedTopologicalTorsionFingerprint( mol, nBits=self.n_feats, targetSize=self.target_size, includeChirality=self.use_chirality) res = np.array(list(res)) return res
def to_np(vect, nbits): arr = numpy.zeros((nbits, ), 'i') return ConvertToNumpyArray(vect, arr)
def fingerprints_from_mol(cls, mol): fp = AllChem.GetMorganFingerprint(mol, 3, useFeatures=True) nfp = np.zeros((1, )) ConvertToNumpyArray(fp, nfp) return nfp
def numpy(self): np_ = np.zeros((1, )) ConvertToNumpyArray(self.fp, np_) return np_
def get_bit_vector(fp): arr = np.zeros((1,)) ConvertToNumpyArray(fp, arr) return(arr)