예제 #1
0
def make_fingerprints(data, length=512, verbose=False):
    fp_list = [
        fingerprint(Chem.rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect,
                    "Torsion "),
        fingerprint(lambda x: GetMorganFingerprintAsBitVect(x, 2, nBits=length),
                    "Morgan"),
        fingerprint(FingerprintMol, "Estate (1995)"),
        fingerprint(lambda x: GetAvalonFP(x, nBits=length),
                    "Avalon bit based (2006)"),
        fingerprint(lambda x: np.append(GetAvalonFP(x, nBits=length), Descriptors.MolWt(x)),
                    "Avalon+mol. weight"),
        fingerprint(lambda x: GetErGFingerprint(x), "ErG fingerprint (2006)"),
        fingerprint(lambda x: RDKFingerprint(x, fpSize=length),
                    "RDKit fingerprint"),
        fingerprint(lambda x: MACCSkeys.GenMACCSKeys(x),
                    "MACCS fingerprint"),
        fingerprint(lambda x: get_fingerprint(x,fp_type='pubchem'), "PubChem"),
        # fingerprint(lambda x: get_fingerprint(x, fp_type='FP4'), "FP4")
        fingerprint(lambda x: Generate.Gen2DFingerprint(x,Gobbi_Pharm2D.factory,dMat=Chem.Get3DDistanceMatrix(x)),
                    "3D pharmacophore"),

    ]

    for fp in fp_list:
        if (verbose): print("doing", fp.name)
        fp.apply_fp(data)

    return fp_list
예제 #2
0
def get_pubchem_fingerprint(smiles):
    """Generate pubchem fingerprint from SMILES.

    Args:
        smiles (str): the SMILES string
    """
    from PyFingerprint.All_Fingerprint import get_fingerprint

    return get_fingerprint(smiles, fp_type="pubchem", output="vector")
예제 #3
0
    def CDKFingerprint(self):
        import pandas as pd
        import os
        os.chdir(r"G:\マイドライブ\Data\Meram Chronic Data")
        df = pd.read_csv('extChronicStrcture.csv', engine='python')
        from PyFingerprint.All_Fingerprint import get_fingerprint

        df = df[['CAS', 'canonical_smiles']]
        df = df.dropna(how='any')
        columns = np.arange(0, 4860, 1).tolist()
        columns.insert(0, 'CAS')
        resultDf = pd.DataFrame(columns=columns)
        for cas, smiles in zip(df['CAS'], df['canonical_smiles']):
            fps = get_fingerprint(smiles, fp_type='klekota-roth')
            tempdf = self.num2fingerprint(fps, cas)
            resultDf = pd.concat([resultDf, tempdf])
        resultDf.to_csv('klekotaRoth.csv', index=False)
예제 #4
0
def get_filtered_fingerprint(smiles):
    """ Get filtered PubChem fingerprint. The digits related to elements other than C,
    H, O, N, S, F, Cl, and Br are discarded.

    Args:
        smiles (str): SMILES string.

    Return:
        fp (np.ndarray): The filtered PubChem fingerprint as a vector.
        length (int): length of the filtered vector.
    """
    from PyFingerprint.All_Fingerprint import get_fingerprint

    fp = get_fingerprint(smiles, fp_type="pubchem", output="vector")
    del_pos = ([
        26,
        27,
        28,
        29,
        30,
        31,
        32,
        41,
        42,
        46,
        47,
        48,
        295,
        296,
        298,
        303,
        304,
        348,
        354,
        369,
        407,
        411,
        415,
        456,
        525,
        627,
    ] + list(range(49, 115)) + list(range(263, 283)) + list(range(288, 293)) +
               list(range(310, 317)) + list(range(318, 327)) +
               list(range(327, 332)) + list(range(424, 427)))
    fp = np.delete(fp, del_pos)
    return fp
예제 #5
0
def pubChemFP(mol):
    fp = get_fingerprint(mol, fp_type='pubchem')
    bitvect = [0] * 881
    for val in fp:
        bitvect[val - 1] = 1
    return np.array(list(bitvect))