def run_AP_clustering_without_local_modules(path, dataset, features, labels): _, A, _ = load_data(path, dataset, features, labels) A1 = A.toarray() for i in range(A1.shape[0]): A1[i, i] = 0 A1[A1 != 0] = 1 mask = A1 mask[mask != 0] = 0 #set null mask (no penalty will be added to messages) embeddings = np.genfromtxt("{}{}.embeddings.txt".format(path, dataset), dtype=np.float32) print("Clustering by Affinity Propagation ...") af = AffinityPropagation(damping=0.9, max_iter=2000, convergence_iter=20, copy=False, preference=None, affinity='euclidean', verbose=True, mask=mask).fit(embeddings) cluster_centers_indices = af.cluster_centers_indices_ labels = af.labels_ n_clusters_ = len(cluster_centers_indices) print("Number of obtained clusters: {}".format(n_clusters_)) print("Preparing clustering results ...") prepare_clusters_AP_without_local_modules(labels, cluster_centers_indices) print("Clustering results file ready") print("Preparing enrichment analysis data for PPI network ...") prepare_enrichment_clusters_without_local_modules(dataset="PPI") print("Enrichment analysis data ready")
def run_AP_clustering_without_GCN(path, dataset, features, labels): _, A, _ = load_data(path, dataset, features, labels) A1 = A.toarray() for i in range(A1.shape[0]): A1[i,i] = 0 A1[A1!=0]=1 mask = A1 mask[mask!=0]=1 tmp1 = np.matmul(A1,A1)#A2 tmp = tmp1.copy() tmp[tmp!=0]=1 mask = np.maximum(mask, tmp) for i in range(mask.shape[0]): mask[i,i] = 1 mask[mask==0] = 100 mask[mask==1] = 0 GE_features = features[:, 1:] print("Clustering by Affinity Propagation ...") af = AffinityPropagation(damping=0.9, max_iter=2000, convergence_iter=20, copy=False, preference=None, affinity='euclidean', verbose=True, mask=mask).fit(GE_features) cluster_centers_indices = af.cluster_centers_indices_ labels = af.labels_ n_clusters_ = len(cluster_centers_indices) print("Number of obtained clusters: {}".format(n_clusters_)) print("Preparing clustering results ...") prepare_clusters_AP_without_GCN(labels, cluster_centers_indices) print("Clustering results file ready") print("Preparing enrichment analysis data for PPI network ...") prepare_enrichment_clusters_without_GCN(dataset="PPI") print("Enrichment analysis data ready")