コード例 #1
0
 def score_model(self, model_configuration: dict, fragments_file: str,
                 descriptors_file: str, output_file: str):
     inputoutput_utils.create_parent_directory(output_file)
     model_data = model_configuration["data"]
     radius = int(
         model_configuration["configuration"]["fragments"][0]["size"])
     active_molecules_tt = []
     for active_molecule in model_data["active"]:
         molecule_smiles = active_molecule.strip("\"")
         molecule = Chem.MolFromSmiles(molecule_smiles)
         tt_fingerprint = Torsions.GetTopologicalTorsionFingerprintAsIntVect(
             molecule, radius)
         active_molecules_tt.append(tt_fingerprint)
     first_line = True
     with open(output_file, "w", encoding="utf-8") as output_stream:
         with open(fragments_file, "r", encoding="utf-8") as input_stream:
             for new_line in input_stream:
                 line = json.loads(new_line)
                 test_molecule_input = line["smiles"]
                 test_molecule_smiles = test_molecule_input.strip("\"")
                 test_molecule = Chem.MolFromSmiles(test_molecule_smiles)
                 test_mol_fingerprint = Torsions.GetTopologicalTorsionFingerprintAsIntVect(
                     test_molecule, radius)
                 max_sim = max([
                     DataStructs.TanimotoSimilarity(test_mol_fingerprint,
                                                    fingerprint)
                     for fingerprint in active_molecules_tt
                 ])
                 score = {"name": line["name"], "score": max_sim}
                 if first_line:
                     first_line = False
                 else:
                     output_stream.write("\n")
                 json.dump(score, output_stream)
コード例 #2
0
    def testTorsionsRegression(self):
        inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.tts.pkl.gz'),
                        'rb')
        torsions = cPickle.load(inF, encoding='bytes')
        for i, m in enumerate(self.mols):
            tt = Torsions.GetTopologicalTorsionFingerprintAsIntVect(m)
            if tt != torsions[i]:
                print(Chem.MolToSmiles(m))
                pd = tt.GetNonzeroElements()
                rd = torsions[i].GetNonzeroElements()
                for k, v in pd.iteritems():
                    if rd.has_key(k):
                        if rd[k] != v:
                            print('>>>1', k, v, rd[k])
                    else:
                        print('>>>2', k, v)
                for k, v in rd.iteritems():
                    if pd.has_key(k):
                        if pd[k] != v:
                            print('>>>3', k, v, pd[k])
                    else:
                        print('>>>4', k, v)

            self.assertTrue(tt == torsions[i])
            self.assertTrue(tt != torsions[i - 1])
コード例 #3
0
ファイル: UnitTestDescriptors.py プロジェクト: yinxx/rdkit
 def testGetTopologicalTorsionFingerprintAsIds(self):
   mol = Chem.MolFromSmiles('C1CCCCN1')
   tt = Torsions.GetTopologicalTorsionFingerprint(mol)
   self.assertEqual(tt.GetNonzeroElements(), {4437590049: 2, 8732557345: 2, 4445978657: 2})
   tt = Torsions.GetTopologicalTorsionFingerprintAsIds(mol)
   self.assertEqual(
     sorted(tt), [4437590049, 4437590049, 4445978657, 4445978657, 8732557345, 8732557345])
   tt = Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol)
   self.assertEqual(tt.GetNonzeroElements(), {4437590049: 2, 8732557345: 2, 4445978657: 2})
コード例 #4
0
    def torsions_fp(self):
        df = pd.read_csv(self.csv_path)
        smiles_list = df['Smiles'].tolist()

        fingerprints = []
        not_found = []
        for i in tqdm(range(len(smiles_list))):
            try:

                mol = Chem.MolFromSmiles(smiles_list[i])
                fp = Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol)
                fp._sumCache = fp.GetTotalVal(
                )  #Bit vector here will be huge, which is why taking TotalVal()
                #             bits = fp.ToBitString()
                #             bits_array = (np.fromstring(fp.ToBitString(),'u1') - ord('0'))
                fingerprints.append(fp._sumCache)

            except:

                fingerprints.append(np.nan)
                not_found.append(i)
                pass

        df.drop(not_found, axis=0, inplace=True)

        print('Number of FPs not found: {}'.format(len(not_found)))

        df.reset_index(drop=True, inplace=True)
        labelencoder = LabelEncoder()
        Y = labelencoder.fit_transform(df['Label'].values)
        Y = Y.reshape(Y.shape[0], 1)

        print('Output shape: {}'.format(Y.shape))

        fp_array = (np.asarray((fingerprints), dtype=object))
        X = np.delete(fp_array, not_found, axis=0)
        X = np.vstack(X).astype(np.float32)
        print('Input shape: {}'.format(X.shape))

        final_array = np.concatenate((X, Y), axis=1)

        # Removing rows, from final_array, where duplicate FPs are present
        final_array_slice = final_array[:, 0:(final_array.shape[1] - 1)]
        _, unq_row_indices = np.unique(final_array_slice,
                                       return_index=True,
                                       axis=0)
        final_array_unique = final_array[unq_row_indices]

        print(
            'Number of Duplicate FPs: {}'.format(final_array.shape[0] -
                                                 final_array_unique.shape[0]))

        print('Final Numpy array shape: {}'.format(final_array_unique.shape))
        print('Type of final array: {}'.format(type(final_array_unique)))
        final_numpy_array = np.asarray((final_array_unique), dtype=np.float32)

        return final_numpy_array
コード例 #5
0
ファイル: UnitTestDescriptors.py プロジェクト: yinxx/rdkit
 def testTorsionsRegression(self):
   inF = gzip.open(os.path.join(self.testDataPath, 'mols1000.tts.pkl.gz'), 'rb')
   torsions = cPickle.load(inF, encoding='bytes')
   for i, m in enumerate(self.mols):
     tt = Torsions.GetTopologicalTorsionFingerprintAsIntVect(m)
     if tt != torsions[i]:  # pragma: nocover
       debugFingerprint(m, tt, torsions[i])
     self.assertEqual(tt, torsions[i])
     self.assertNotEqual(tt, torsions[i - 1])
コード例 #6
0
def TORSIONSfpDataFrame(chempandas, namecol, smicol):
    """
    Torsions-based fingerprints 2048 bits. 
    """
    assert chempandas.shape[0] <= MAXLINES
    molsmitmp = [Chem.MolFromSmiles(x) for x in chempandas.iloc[:, smicol]]
    i = 0
    molsmi = []
    for x in molsmitmp:
        if x is not None:
            x.SetProp("_Name", chempandas.iloc[i, namecol])
            molsmi.append(x)
        i += 1
    # TORSIONS Fingerprints.
    fps = [
        Torsions.GetTopologicalTorsionFingerprintAsIntVect(x) for x in molsmi
    ]
    fpsmat = np.matrix(fps)
    df = DataFrame(fpsmat, index=[x.GetProp("_Name")
                                  for x in molsmi])  # how to name the col?
    df['SMILES'] = [Chem.MolToSmiles(x) for x in molsmi]
    df['CHEMBL'] = df.index
    return (df)
コード例 #7
0
FPDICT['fcfp6'] = lambda m: Chem.GetMorganFingerprintAsBitVect(
    m, 3, useFeatures=True, nBits=nbits)
FPDICT['fcfc2'] = lambda m: Chem.GetMorganFingerprint(m, 1, useFeatures=True)
FPDICT['fcfc4'] = lambda m: Chem.GetMorganFingerprint(m, 2, useFeatures=True)
FPDICT['fcfc6'] = lambda m: Chem.GetMorganFingerprint(m, 3, useFeatures=True)
FPDICT['lecfp4'] = lambda m: Chem.GetMorganFingerprintAsBitVect(
    m, 2, nBits=nbits_long)
FPDICT['lecfp6'] = lambda m: Chem.GetMorganFingerprintAsBitVect(
    m, 3, nBits=nbits_long)
FPDICT['lfcfp4'] = lambda m: Chem.GetMorganFingerprintAsBitVect(
    m, 2, useFeatures=True, nBits=nbits_long)
FPDICT['lfcfp6'] = lambda m: Chem.GetMorganFingerprintAsBitVect(
    m, 3, useFeatures=True, nBits=nbits_long)
FPDICT['maccs'] = lambda m: MACCSkeys.GenMACCSKeys(m)
FPDICT['ap'] = lambda m: Pairs.GetAtomPairFingerprint(m)
FPDICT['tt'] = lambda m: Torsions.GetTopologicalTorsionFingerprintAsIntVect(m)
FPDICT['hashap'] = lambda m: Desc.GetHashedAtomPairFingerprintAsBitVect(
    m, nBits=nbits)
FPDICT[
    'hashtt'] = lambda m: Desc.GetHashedTopologicalTorsionFingerprintAsBitVect(
        m, nBits=nbits)
FPDICT['rdk5'] = lambda m: Chem.RDKFingerprint(
    m, maxPath=5, fpSize=nbits, nBitsPerHash=2)
FPDICT['rdk6'] = lambda m: Chem.RDKFingerprint(
    m, maxPath=6, fpSize=nbits, nBitsPerHash=2)
FPDICT['rdk7'] = lambda m: Chem.RDKFingerprint(
    m, maxPath=7, fpSize=nbits, nBitsPerHash=2)
if USE_AVALON:
    FPDICT['avalon'] = lambda m: pyAv.GetAvalonFP(m, nbits)
    FPDICT['avalon_l'] = lambda m: pyAv.GetAvalonFP(m, nbits_long)
コード例 #8
0
ファイル: fwcluster.py プロジェクト: wxlsummer/FWCluster
def ClusterOnFingerprint(filename, mols=None, fingerprint=0, cutoff=0.8, metric='Tanimoto', outMatrix=False):
    '''Clustering Structure based on Fingerprints in RDKit

    filename: Smile format file saving molecules. If set to None, use given "mols"
    mols: Input molecules. No use if set up "filename"
    cutoff: Cutoff using for Butina Clustering
    fingerprint: Fingerprint to use:
        0 or else:  RDKit Topological Fingerprint
        1: MACCS Fingerprint
        2: Atom Pair Fingerprint (AP)
        3: Topological Torsion Fingerprint (TT)
        4: Morgan Fingerprint similar to ECFP4 Fingerprint
        5: Morgan Fingerprint similar to FCFP4 Fingerprint
    metric: Available similarity metrics include: 
            Tanimoto, Dice, Cosine, Sokal, Russel, Kulczynski, McConnaughey, and Tversky.
    outMatrix: Change output to a similarity matrix
    Return: Default output "clusters, clusterOut":
        clusters: Clusters containing molecule number.
        clusterOut: Molecular Cluster Number in List.
    '''

    from rdkit import DataStructs
    from rdkit.Chem.Draw import SimilarityMaps
    from rdkit.Chem.Fingerprints import FingerprintMols
    from rdkit.Chem import MACCSkeys
    from rdkit.Chem.AtomPairs import Pairs, Torsions

    if filename:
        suppl = Chem.SmilesMolSupplier(filename)
        mols=[]
        for mol in suppl:
            mols.append(mol)
    molnums=len(mols)

    ### Calculate Molecular Fingerprint
    ## MACCS Fingerprint
    if fingerprint==1:
        fps = [MACCSkeys.GenMACCSKeys(mol) for mol in mols]
    ## Atom Pair Fingerprint (AP)
    elif fingerprint == 2:
        fps = [Pairs.GetAtomPairFingerprint(mol) for mol in mols]
    ## Topological Torsion Fingerprint (TT)
    elif fingerprint == 3:
        fps = [Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol) for mol in mols]
    ## Morgan Fingerprint similar to ECFP4 Fingerprint
    elif fingerprint == 4:
        fps = [AllChem.GetMorganFingerprint(mol,2) for mol in mols]
    ## Morgan Fingerprint similar to FCFP4 Fingerprint
    elif fingerprint == 5:
        fps = [AllChem.GetMorganFingerprint(mol,2,useFeatures=True) for mol in mols]
    ## RDKit Topological Fingerprint
    else: #fingerprint==0:
        fps = [FingerprintMols.FingerprintMol(mol) for mol in mols]

    if outMatrix:
        ### Output the Fingerprint similarity Matrix
        metricsAvailable={'tanimoto':DataStructs.TanimotoSimilarity,"dice":DataStructs.DiceSimilarity,
        "cosine": DataStructs.CosineSimilarity, "sokal": DataStructs.SokalSimilarity, "russel": DataStructs.RusselSimilarity, 
        "rogotGoldberg": DataStructs.RogotGoldbergSimilarity, "allbit": DataStructs.AllBitSimilarity, 
        "kulczynski": DataStructs.KulczynskiSimilarity, "mcconnaughey": DataStructs.McConnaugheySimilarity,
        "asymmetric": DataStructs.AsymmetricSimilarity, "braunblanquet": DataStructs.BraunBlanquetSimilarity}
        
        if metric.lower() not in metricsAvailable:
            print "The given metric is unknown!"
            metric='Tanimoto'

        simMetrics=metricsAvailable[metric.lower()]

        ### Calculate Fingerprint similarity Matrix
        simdm=[[0.0]*molnums]*molnums
        for i in range(molnums):
            simdm[i,i]=1.0
            for j in range(i+1,molnums):
                simdm[i,j]=DataStructs.FingerprintSimilarity(fps[i],fps[j],metric=simMetrics)
                simdm[j,i]=DataStructs.FingerprintSimilarity(fps[j],fps[i],metric=simMetrics)

        for i in range(molnums):
            print
            for j in range(molnums):
                print '%3.2f' % simdm[i,j],
        return simdm

    else:
        clusters=ClusterFps(fps, cutoff=1-cutoff, metric='Tanimoto')
        clusterID=0
        clusterOut=[0]*len(mols)
        for cluster in clusters:
            clusterID+=1
            for idx in cluster:
                clusterOut[idx]=clusterID
            ## To depict cluster molecule
            if False:
                if len(cluster)>1:
                    print "Cluster: "
                    for idx in cluster:
                        mol2mpl(mols[idx])
        return clusters, clusterOut      
コード例 #9
0
def sim_two_serial():
    #Load Data-----------------------------------------------------------------------
    path1 = input("Path for list 1: ")
    path2 = input("Path for list 2: ")

    smis1 = pd.read_csv(path1)
    smis1 = smis1["smiles"]
    smis2 = pd.read_csv(path2)
    smis2 = smis2["smiles"]
    l1 = len(smis1)
    l2 = len(smis2)
    l = l1 * l2
    lp = round(l / 20)

    #Get molecules from smiles-----------------------------------------------------------------------
    bad1 = []
    molecules1 = []
    for i, smi in enumerate(smis1):
        m = Chem.MolFromSmiles(smi)
        if m is None:
            print('smile with number:', i,
                  'in list 1 could not be converted to molecule')
            bad1.append(i)
            continue
        molecules1.append(m)

    bad2 = []
    molecules2 = []
    for i, smi in enumerate(smis2):
        m = Chem.MolFromSmiles(smi)
        if m is None:
            print('smile with number:', i,
                  'in list 2 could not be converted to molecule')
            bad2.append(i)
            continue
        molecules2.append(m)

    #can1=[Chem.MolToSmiles(x) for x in molecules1]
    #can2=[Chem.MolToSmiles(x) for x in molecules2]
    #for j in bad1:
    #can1.insert(j,"bad1")
    #for j in bad2:
    #can2.insert(j,"bad2")
    smis1 = []
    smis2 = []

    #Final output matrix-----------------------------------------------------------------------
    similarity = np.zeros(shape=(l1, l2), dtype=np.float32)

    from rdkit.Chem import MACCSkeys
    from rdkit.Chem.AtomPairs import Pairs
    from rdkit.Chem.AtomPairs import Torsions
    from rdkit.Chem import AllChem

    print('Begining fingerprint calculation...wait')
    fps_topol1 = [FingerprintMols.FingerprintMol(x) for x in molecules1]
    fps_maccs1 = [MACCSkeys.GenMACCSKeys(x) for x in molecules1]
    fps_pairs1 = [Pairs.GetAtomPairFingerprint(x) for x in molecules1]
    fps_tts1 = [
        Torsions.GetTopologicalTorsionFingerprintAsIntVect(x)
        for x in molecules1
    ]
    fps_ecfp4_1 = [
        AllChem.GetMorganFingerprintAsBitVect(x, 2, nBits=1024)
        for x in molecules1
    ]
    fps_ecfp6_1 = [
        AllChem.GetMorganFingerprintAsBitVect(x, 3, nBits=1024)
        for x in molecules1
    ]
    fps_fcfp4_1 = [
        AllChem.GetMorganFingerprintAsBitVect(x,
                                              2,
                                              nBits=1024,
                                              useFeatures=True)
        for x in molecules1
    ]
    fps_fcfp6_1 = [
        AllChem.GetMorganFingerprintAsBitVect(x,
                                              3,
                                              nBits=1024,
                                              useFeatures=True)
        for x in molecules1
    ]
    print('Begining fingerprint calculation...50%')
    fps_topol2 = [FingerprintMols.FingerprintMol(x) for x in molecules2]
    fps_maccs2 = [MACCSkeys.GenMACCSKeys(x) for x in molecules2]
    fps_pairs2 = [Pairs.GetAtomPairFingerprint(x) for x in molecules2]
    fps_tts2 = [
        Torsions.GetTopologicalTorsionFingerprintAsIntVect(x)
        for x in molecules2
    ]
    fps_ecfp4_2 = [
        AllChem.GetMorganFingerprintAsBitVect(x, 2, nBits=1024)
        for x in molecules2
    ]
    fps_ecfp6_2 = [
        AllChem.GetMorganFingerprintAsBitVect(x, 3, nBits=1024)
        for x in molecules2
    ]
    fps_fcfp4_2 = [
        AllChem.GetMorganFingerprintAsBitVect(x,
                                              2,
                                              nBits=1024,
                                              useFeatures=True)
        for x in molecules2
    ]
    fps_fcfp6_2 = [
        AllChem.GetMorganFingerprintAsBitVect(x,
                                              3,
                                              nBits=1024,
                                              useFeatures=True)
        for x in molecules2
    ]
    print('Begining fingerprint calculation...done\n')

    for j in bad1:
        fps_topol1.insert(j, 1)
        fps_maccs1.insert(j, 1)
        fps_pairs1.insert(j, 1)
        fps_tts1.insert(j, 1)
        fps_ecfp4_1.insert(j, 1)
        fps_ecfp6_1.insert(j, 1)
        fps_fcfp4_1.insert(j, 1)
        fps_fcfp6_1.insert(j, 1)

    for j in bad2:
        fps_topol2.insert(j, 1)
        fps_maccs2.insert(j, 1)
        fps_pairs2.insert(j, 1)
        fps_tts2.insert(j, 1)
        fps_ecfp4_2.insert(j, 1)
        fps_ecfp6_2.insert(j, 1)
        fps_fcfp4_2.insert(j, 1)
        fps_fcfp6_2.insert(j, 1)

    print('Begining of fingerprints similarity calculation\n')
    molecules1 = []
    molecules2 = []

    k = 0
    maxs = 2 / (0.65 * 10) + 2 / (0.6 * 10) + 2 / (0.7 * 10) + 1 / (
        0.75 * 5) + 1 / (0.85 * 5)
    for i in range(l1):
        for j in range(l2):
            if not ((i in bad1) or (j in bad2)):
                similarities_topol = DataStructs.FingerprintSimilarity(
                    fps_topol1[i], fps_topol2[j])
                similarities_maccs = DataStructs.FingerprintSimilarity(
                    fps_maccs1[i], fps_maccs2[j])
                similarities_pairs = DataStructs.DiceSimilarity(
                    fps_pairs1[i], fps_pairs2[j])
                similarities_tts = DataStructs.DiceSimilarity(
                    fps_tts1[i], fps_tts2[j])
                similarities_ecfp4 = DataStructs.FingerprintSimilarity(
                    fps_ecfp4_1[i], fps_ecfp4_2[j])
                similarities_ecfp6 = DataStructs.FingerprintSimilarity(
                    fps_ecfp6_1[i], fps_ecfp6_2[j])
                similarities_fcfp4 = DataStructs.FingerprintSimilarity(
                    fps_fcfp4_1[i], fps_fcfp4_2[j])
                similarities_fcfp6 = DataStructs.FingerprintSimilarity(
                    fps_fcfp6_1[i], fps_fcfp6_2[j])
                similarity[i][j] = (
                    0.5 *
                    (similarities_ecfp4 / 0.65 + similarities_ecfp6 / 0.6) +
                    0.5 *
                    (similarities_fcfp4 / 0.65 + similarities_fcfp6 / 0.6) +
                    0.5 * (similarities_tts / 0.7 + similarities_pairs / 0.7) +
                    similarities_maccs / 0.85 + similarities_topol / 0.75) / 5
            k = k + 1
            if k % lp == 0:
                print('running:', (k / l) * 100, '%')
        #for other similarity metrics use for example DataStructs.FingerprintSimilarity(fps[0],fps[1], metric=DataStructs.DiceSimilarity)

    similarity = similarity / maxs
    similarity[bad1, :] = 10
    similarity[:, bad2] = 10

    print('End of fingerprints similarity calculation')
    bad1 = []
    bad2 = []

    df_similarity = pd.DataFrame(similarity)
    similarity = []
    return df_similarity
コード例 #10
0
def sim_one_serial():
    #Load Data-----------------------------------------------------------------------
    path = input("Path for list : ")
    smis = pd.read_csv(path)
    smis = smis["smiles"]
    l = len(smis)
    lp = round(l * l / 20)
    #Get molecules from smiles-----------------------------------------------------------------------
    bad = []
    molecules = []
    for i, smi in enumerate(smis):
        m = Chem.MolFromSmiles(smi)
        if m is None:
            print('smile with number:', i,
                  'in list could not be converted to molecule')
            bad.append(i)
            continue
        molecules.append(m)
    #can=[Chem.MolToSmiles(x) for x in molecules]
    #for j in bad:
    #can.insert(j,"bad")
    smis = []
    #Final output matrix-----------------------------------------------------------------------
    similarity = np.zeros(shape=(l, l), dtype=np.float32)

    from rdkit.Chem import MACCSkeys
    from rdkit.Chem.AtomPairs import Pairs
    from rdkit.Chem.AtomPairs import Torsions
    from rdkit.Chem import AllChem

    print('Begining fingerprint calculation...wait')
    fps_topol = [FingerprintMols.FingerprintMol(x) for x in molecules]
    fps_maccs = [MACCSkeys.GenMACCSKeys(x) for x in molecules]
    fps_pairs = [Pairs.GetAtomPairFingerprint(x) for x in molecules]
    fps_tts = [
        Torsions.GetTopologicalTorsionFingerprintAsIntVect(x)
        for x in molecules
    ]
    fps_ecfp4 = [
        AllChem.GetMorganFingerprintAsBitVect(x, 2, nBits=1024)
        for x in molecules
    ]
    fps_ecfp6 = [
        AllChem.GetMorganFingerprintAsBitVect(x, 3, nBits=1024)
        for x in molecules
    ]
    fps_fcfp4 = [
        AllChem.GetMorganFingerprintAsBitVect(x,
                                              2,
                                              nBits=1024,
                                              useFeatures=True)
        for x in molecules
    ]
    fps_fcfp6 = [
        AllChem.GetMorganFingerprintAsBitVect(x,
                                              3,
                                              nBits=1024,
                                              useFeatures=True)
        for x in molecules
    ]
    print('Begining fingerprint calculation...done\n')

    for j in bad:
        fps_topol.insert(j, 1)
        fps_maccs.insert(j, 1)
        fps_pairs.insert(j, 1)
        fps_tts.insert(j, 1)
        fps_ecfp4.insert(j, 1)
        fps_ecfp6.insert(j, 1)
        fps_fcfp4.insert(j, 1)
        fps_fcfp6.insert(j, 1)

    #molecules=[]

    print('Begining of fingerprints similarity calculation\n')
    k = 0
    maxs = 2 / (0.65 * 10) + 2 / (0.6 * 10) + 2 / (0.7 * 10) + 1 / (
        0.75 * 5) + 1 / (0.85 * 5)
    for i in range(l):
        for j in range(l):
            if i >= j:
                if not ((i in bad) or (j in bad)):
                    similarities_topol = DataStructs.FingerprintSimilarity(
                        fps_topol[i], fps_topol[j])
                    similarities_maccs = DataStructs.FingerprintSimilarity(
                        fps_maccs[i], fps_maccs[j])
                    similarities_pairs = DataStructs.DiceSimilarity(
                        fps_pairs[i], fps_pairs[j])
                    similarities_tts = DataStructs.DiceSimilarity(
                        fps_tts[i], fps_tts[j])
                    similarities_ecfp4 = DataStructs.FingerprintSimilarity(
                        fps_ecfp4[i], fps_ecfp4[j])
                    similarities_ecfp6 = DataStructs.FingerprintSimilarity(
                        fps_ecfp6[i], fps_ecfp6[j])
                    similarities_fcfp4 = DataStructs.FingerprintSimilarity(
                        fps_fcfp4[i], fps_fcfp4[j])
                    similarities_fcfp6 = DataStructs.FingerprintSimilarity(
                        fps_fcfp6[i], fps_fcfp6[j])
                    similarity[i][j] = (
                        0.5 *
                        (similarities_ecfp4 / 0.65 + similarities_ecfp6 / 0.6)
                        + 0.5 *
                        (similarities_fcfp4 / 0.65 + similarities_fcfp6 / 0.6)
                        + 0.5 *
                        (similarities_tts / 0.7 + similarities_pairs / 0.7) +
                        similarities_maccs / 0.85 +
                        similarities_topol / 0.75) / 5
                    similarity[j][i] = similarity[i][j]
                k = k + 1
                if k % lp == 0:
                    print('running:', (k / (l * l / 2)) * 100, '%')
        #for other similarity metrics use for example DataStructs.FingerprintSimilarity(fps[0],fps[1], metric=DataStructs.DiceSimilarity)

    similarity = similarity / maxs
    similarity[bad, :] = 10
    similarity[:, bad] = 10

    print('End of fingerprints similarity calculation')
    bad = []

    df_similarity = pd.DataFrame(similarity)
    similarity = []
    return df_similarity
コード例 #11
0
)
fpdict["lecfp4"] = lambda m: AllChem.GetMorganFingerprintAsBitVect(
    m, 2, nBits=longbits
)
fpdict["lecfp6"] = lambda m: AllChem.GetMorganFingerprintAsBitVect(
    m, 3, nBits=longbits
)
fpdict["lfcfp4"] = lambda m: AllChem.GetMorganFingerprintAsBitVect(
    m, 2, useFeatures=True, nBits=longbits
)
fpdict["lfcfp6"] = lambda m: AllChem.GetMorganFingerprintAsBitVect(
    m, 3, useFeatures=True, nBits=longbits
)
fpdict["maccs"] = lambda m: MACCSkeys.GenMACCSKeys(m)
fpdict["ap"] = lambda m: Pairs.GetAtomPairFingerprint(m)
fpdict["tt"] = lambda m: Torsions.GetTopologicalTorsionFingerprintAsIntVect(m)
fpdict[
    "hashap"
] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
    m, nBits=nbits
)
fpdict[
    "hashap_cas_length"
] = lambda m: rdMolDescriptors.GetHashedAtomPairFingerprintAsBitVect(
    m, nBits=n_cas_bits
)
fpdict[
    "hashtt"
] = lambda m: rdMolDescriptors.GetHashedTopologicalTorsionFingerprintAsBitVect(
    m, nBits=nbits
)
コード例 #12
0
            similarities_pairs[i][j] = 1
    if i % 500 == 0:
        print('running:', i / len(fps_pairs) * 100, '%')

# In[ ]:

df = pd.DataFrame(similarities_pairs)
df.to_csv('similarities_pairs.csv')

# ### Topological torsion descriptors

# In[ ]:

from rdkit.Chem.AtomPairs import Torsions
fps_tts = [
    Torsions.GetTopologicalTorsionFingerprintAsIntVect(x) for x in molecules
]
similarities_tts = np.zeros(shape=((len(fps_tts), len(fps_tts))))

# In[ ]:

#compute similarities.  Comment this section if only the fingerprints are needed
for i in range(len(fps_tts)):
    for j in range(len(fps_tts)):
        if i > j:
            similarities_tts[i][j] = DataStructs.DiceSimilarity(
                fps_tts[i],
                fps_tts[j])  #default is the Dice similarity for these fps
            similarities_tts[j][i] = similarities_tts[i][j]
        elif i == j:
            similarities_tts[i][j] = 1
コード例 #13
0
def BuildTorsionsFP(mol):
    from rdkit.Chem.AtomPairs import Torsions
    fp = Torsions.GetTopologicalTorsionFingerprintAsIntVect(mol)
    fp._sumCache = fp.GetTotalVal()
    return fp
コード例 #14
0
    '/drug_development/studyRdkit/st_rdcit/img/mol21.jpg'
)
pairFps = [Pairs.GetAtomPairFingerprint(x) for x in ms]
print(pairFps)
# 由于包含在原子对指纹中的位空间很大,因此他们以稀疏的方式存储为字典形式
d = pairFps[-1].GetNonzeroElements()
print(d)  # {541732: 1, 558113: 2, 558115: 2, 558146: 1, 1606690: 2, 1606721: 2}
print(d[541732])  # 1
# 位描述也可以像如下所示展示
de = Pairs.ExplainPairScore(558115)
print(de)  # (('C', 1, 0), 3, ('C', 2, 0))
# The above means: C with 1 neighbor and 0 pi electrons which is 3 bonds from a C with 2 neighbors and 0 pi electrons
# 碳带有一个邻位孤电子和0个π电子,这是因为碳与两个邻位原子和氧原子形成3个化学键。
# # 2.4 拓扑扭曲topological torsions

tts = [Torsions.GetTopologicalTorsionFingerprintAsIntVect(x) for x in ms]
d_ds = DataStructs.DiceSimilarity(tts[0], tts[1])
print(d_ds)  # 0.16666666666666666
# # 2.5 摩根指纹(圆圈指纹)AllChem.GetMorganFingerprint(mol,2)
# 通过将Morgan算法应用于一组用户提供的原子不变式,可以构建这一系列的指纹。生成Morgan指纹时,还必须提供指纹的半径
m1 = Chem.MolFromSmiles('Cc1ccccc1')
m2 = Chem.MolFromSmiles('Cc1ncccc1')

fp1 = AllChem.GetMorganFingerprint(m1, 2)
fp2 = AllChem.GetMorganFingerprint(m2, 2)
d_mf = DataStructs.DiceSimilarity(fp1, fp2)
print(d_mf)  # 0.55

# Morgan指纹像原子对和拓扑扭转一样,默认情况系按使用计数,但有也可以将他们计算为位向量
fp1 = AllChem.GetMorganFingerprintAsBitVect(m1, 2, nBits=1024)
fp2 = AllChem.GetMorganFingerprintAsBitVect(m2, 2, nBits=1024)
コード例 #15
0
 def Calc_Torsions(self):
     tts = [Torsions.GetTopologicalTorsionFingerprintAsIntVect(x) for x in self.sd]
     return tts 
コード例 #16
0
ファイル: fingerprints.py プロジェクト: akapoor85/Bython
def FptTorsion(rdkmol):
    return Torsions.GetTopologicalTorsionFingerprintAsIntVect(rdkmol)