def make_fingerprints(data, length=512, verbose=False): fp_list = [ fingerprint(Chem.rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect, "Torsion "), fingerprint(lambda x: GetMorganFingerprintAsBitVect(x, 2, nBits=length), "Morgan"), fingerprint(FingerprintMol, "Estate (1995)"), fingerprint(lambda x: GetAvalonFP(x, nBits=length), "Avalon bit based (2006)"), fingerprint(lambda x: np.append(GetAvalonFP(x, nBits=length), Descriptors.MolWt(x)), "Avalon+mol. weight"), fingerprint(lambda x: GetErGFingerprint(x), "ErG fingerprint (2006)"), fingerprint(lambda x: RDKFingerprint(x, fpSize=length), "RDKit fingerprint"), fingerprint(lambda x: MACCSkeys.GenMACCSKeys(x), "MACCS fingerprint"), fingerprint(lambda x: get_fingerprint(x,fp_type='pubchem'), "PubChem"), # fingerprint(lambda x: get_fingerprint(x, fp_type='FP4'), "FP4") fingerprint(lambda x: Generate.Gen2DFingerprint(x,Gobbi_Pharm2D.factory,dMat=Chem.Get3DDistanceMatrix(x)), "3D pharmacophore"), ] for fp in fp_list: if (verbose): print("doing", fp.name) fp.apply_fp(data) return fp_list
def get_pubchem_fingerprint(smiles): """Generate pubchem fingerprint from SMILES. Args: smiles (str): the SMILES string """ from PyFingerprint.All_Fingerprint import get_fingerprint return get_fingerprint(smiles, fp_type="pubchem", output="vector")
def CDKFingerprint(self): import pandas as pd import os os.chdir(r"G:\マイドライブ\Data\Meram Chronic Data") df = pd.read_csv('extChronicStrcture.csv', engine='python') from PyFingerprint.All_Fingerprint import get_fingerprint df = df[['CAS', 'canonical_smiles']] df = df.dropna(how='any') columns = np.arange(0, 4860, 1).tolist() columns.insert(0, 'CAS') resultDf = pd.DataFrame(columns=columns) for cas, smiles in zip(df['CAS'], df['canonical_smiles']): fps = get_fingerprint(smiles, fp_type='klekota-roth') tempdf = self.num2fingerprint(fps, cas) resultDf = pd.concat([resultDf, tempdf]) resultDf.to_csv('klekotaRoth.csv', index=False)
def get_filtered_fingerprint(smiles): """ Get filtered PubChem fingerprint. The digits related to elements other than C, H, O, N, S, F, Cl, and Br are discarded. Args: smiles (str): SMILES string. Return: fp (np.ndarray): The filtered PubChem fingerprint as a vector. length (int): length of the filtered vector. """ from PyFingerprint.All_Fingerprint import get_fingerprint fp = get_fingerprint(smiles, fp_type="pubchem", output="vector") del_pos = ([ 26, 27, 28, 29, 30, 31, 32, 41, 42, 46, 47, 48, 295, 296, 298, 303, 304, 348, 354, 369, 407, 411, 415, 456, 525, 627, ] + list(range(49, 115)) + list(range(263, 283)) + list(range(288, 293)) + list(range(310, 317)) + list(range(318, 327)) + list(range(327, 332)) + list(range(424, 427))) fp = np.delete(fp, del_pos) return fp
def pubChemFP(mol): fp = get_fingerprint(mol, fp_type='pubchem') bitvect = [0] * 881 for val in fp: bitvect[val - 1] = 1 return np.array(list(bitvect))