def _featurize_ligand(self, ligand: rdkit.Chem.Mol) -> np.ndarray: from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect as Morgan # FIXME: Check whether OFF uses canonical smiles internally, or not # otherwise, we should force that behaviour ourselves! fp = Morgan(ligand, radius=self.radius, nBits=self.nbits) return np.asarray(fp, dtype="uint8")
def fingerprint(smiles_or_mol, fp_type='maccs', dtype=None, morgan__r=2, morgan__n=1024, *args, **kwargs): """ Generates fingerprint for SMILES If smiles is invalid, returns None Returns numpy array of fingerprint bits Parameters: smiles: SMILES string type: type of fingerprint: [MACCS|morgan] dtype: if not None, specifies the dtype of returned array """ fp_type = fp_type.lower() molecule = get_mol(smiles_or_mol, *args, **kwargs) if molecule is None: return None if fp_type == 'maccs': keys = MACCSkeys.GenMACCSKeys(molecule) keys = np.array(keys.GetOnBits()) fingerprint = np.zeros(166, dtype='uint8') if len(keys) != 0: fingerprint[keys - 1] = 1 # We drop 0-th key that is always zero elif fp_type == 'morgan': fingerprint = np.asarray(Morgan(molecule, morgan__r, nBits=morgan__n), dtype='uint8') else: raise ValueError("Unknown fingerprint type {}".format(fp_type)) if dtype is not None: fingerprint = fingerprint.astype(dtype) return fingerprint
def fingerprints(smiles): "Calculates fingerprints of a list of SMILES strings" fps = np.zeros((len(smiles), 1024)) for i, smi in enumerate(smiles): mol = Chem.MolFromSmiles(smi) fp = np.asarray(Morgan(mol, 2, 1024), dtype='uint8') fps[i,:] = fp return fps
def _featurize_one(self, system: System) -> np.ndarray: """ Parameters ---------- system : System The System to be featurized. options : dict Unused Returns ------- array """ from rdkit.Chem.AllChem import GetMorganFingerprintAsBitVect as Morgan # FIXME: Check whether OFF uses canonical smiles internally, or not # otherwise, we should force that behaviour ourselves! ligand = self._find_ligand(system).to_rdkit() fp = Morgan(ligand, radius=self.radius, nBits=self.nbits) return np.asarray(fp, dtype="int64")