コード例 #1
0
    def compute_tsne(self):
        Database = self.Database2
        smiles = list(Database["SMILES"])
        smi = [Chem.MolFromSmiles(x) for x in smiles]
        fps = [AllChem.GetMorganFingerprintAsBitVect(x, 3) for x in smi]
        tanimoto_sim_mat_lower_triangle = GetTanimotoSimMat(fps)
        n_mol = len(fps)
        similarity_matrix = np.ones([n_mol, n_mol])
        i_lower = np.tril_indices(n=n_mol, m=n_mol, k=-1)
        i_upper = np.triu_indices(n=n_mol, m=n_mol, k=1)
        similarity_matrix[i_lower] = tanimoto_sim_mat_lower_triangle
        similarity_matrix[i_upper] = similarity_matrix.T[i_upper]
        distance_matrix = np.subtract(1, similarity_matrix)

        TSNE_sim = TSNE(
            n_components=2,
            init='pca',
            random_state=1992,
            angle=0.3,
            perplexity=self.perplexity).fit_transform(distance_matrix)
        tsne_result = pd.DataFrame(data=TSNE_sim, columns=["PC1", "PC2"])
        tsne_result["LIBRARY"] = list(Database.LIBRARY)
        tsne_result["TIPO"] = list(Database.LIBRARY)
        tsne_result["SMILES"] = list(Database.SMILES)
        tsne_result["NAME"] = list(Database.NAME)
        self.tsne_result = tsne_result.set_index('TIPO')
コード例 #2
0
    def compute_pca(self):
        Database = self.Database2
        smiles = list(Database.SMILES)
        smi = [Chem.MolFromSmiles(x) for x in smiles]
        fps = [AllChem.GetMorganFingerprintAsBitVect(x, 2) for x in smi]

        tanimoto_sim_mat_lower_triangle = GetTanimotoSimMat(fps)
        n_mol = len(fps)
        similarity_matrix = np.ones([n_mol, n_mol])
        i_lower = np.tril_indices(n=n_mol, m=n_mol, k=-1)
        i_upper = np.triu_indices(n=n_mol, m=n_mol, k=1)
        similarity_matrix[i_lower] = tanimoto_sim_mat_lower_triangle
        similarity_matrix[i_upper] = similarity_matrix.T[i_upper]

        sklearn_pca = sklearn.decomposition.PCA(n_components=2,
                                                svd_solver="full",
                                                whiten=True)
        sklearn_pca.fit(similarity_matrix)
        variance = list(sklearn_pca.explained_variance_ratio_)
        a = round(variance[0] * 100, 2)
        b = round(variance[1] * 100, 2)
        pca_result = pd.DataFrame(sklearn_pca.transform(similarity_matrix),
                                  columns=['PC1', 'PC2'])
        pca_result["LIBRARY"] = Database.LIBRARY
        pca_result["TIPO"] = Database.LIBRARY
        pca_result["SMILES"] = Database.SMILES
        pca_result["NAME"] = Database.NAME
        self.pca_result = pca_result.set_index('TIPO')
        variance = list(sklearn_pca.explained_variance_ratio_)
        self.a = round(variance[0] * 100, 2)
        self.b = round(variance[1] * 100, 2)

        return pca_result
コード例 #3
0
def compute_maccskeys_fp(Data):
    smiles = list(Data["canonical_smiles"])  # set smile colum
    smi = [Chem.MolFromSmiles(x) for x in smiles]
    fps = [MACCSkeys.GenMACCSKeys(x) for x in smi]
    tanimoto_sim_mat_lower_triangle = GetTanimotoSimMat(fps)
    n_mol = len(fps)
    similarity_matrix = np.ones([n_mol, n_mol])
    i_lower = np.tril_indices(n=n_mol, m=n_mol, k=-1)
    i_upper = np.triu_indices(n=n_mol, m=n_mol, k=1)
    similarity_matrix[i_lower] = tanimoto_sim_mat_lower_triangle
    similarity_matrix[i_upper] = similarity_matrix.T[i_upper]
    distance_matrix = np.subtract(1, similarity_matrix)
    distance_matrix = np.round(distance_matrix, 2)
    print(distance_matrix)
    ids = list(Data["chembl_id"])  # set id columns
    return distance_matrix, ids