Beispiel #1
0
def run_AP_clustering_without_local_modules(path, dataset, features, labels):
    _, A, _ = load_data(path, dataset, features, labels)

    A1 = A.toarray()
    for i in range(A1.shape[0]):
        A1[i, i] = 0
    A1[A1 != 0] = 1

    mask = A1
    mask[mask != 0] = 0  #set null mask (no penalty will be added to messages)

    embeddings = np.genfromtxt("{}{}.embeddings.txt".format(path, dataset),
                               dtype=np.float32)
    print("Clustering by Affinity Propagation ...")
    af = AffinityPropagation(damping=0.9,
                             max_iter=2000,
                             convergence_iter=20,
                             copy=False,
                             preference=None,
                             affinity='euclidean',
                             verbose=True,
                             mask=mask).fit(embeddings)
    cluster_centers_indices = af.cluster_centers_indices_
    labels = af.labels_
    n_clusters_ = len(cluster_centers_indices)
    print("Number of obtained clusters: {}".format(n_clusters_))

    print("Preparing clustering results ...")
    prepare_clusters_AP_without_local_modules(labels, cluster_centers_indices)
    print("Clustering results file ready")
    print("Preparing enrichment analysis data for PPI network ...")
    prepare_enrichment_clusters_without_local_modules(dataset="PPI")
    print("Enrichment analysis data ready")
Beispiel #2
0
def run_AP_clustering_without_GCN(path, dataset, features, labels):
    _, A, _ = load_data(path, dataset, features, labels)
    
    A1 = A.toarray()
    for i in range(A1.shape[0]):
        A1[i,i] = 0
    A1[A1!=0]=1
    
    mask = A1
    mask[mask!=0]=1
    
    tmp1 = np.matmul(A1,A1)#A2
    tmp = tmp1.copy()
    tmp[tmp!=0]=1
    mask = np.maximum(mask, tmp)
    for i in range(mask.shape[0]):
        mask[i,i] = 1
    
    mask[mask==0] = 100
    mask[mask==1] = 0
    
    GE_features = features[:, 1:]
    print("Clustering by Affinity Propagation ...")
    af = AffinityPropagation(damping=0.9, max_iter=2000, convergence_iter=20, copy=False, preference=None, affinity='euclidean', verbose=True, mask=mask).fit(GE_features)
    cluster_centers_indices = af.cluster_centers_indices_
    labels = af.labels_
    n_clusters_ = len(cluster_centers_indices)
    print("Number of obtained clusters: {}".format(n_clusters_))
    
    print("Preparing clustering results ...")
    prepare_clusters_AP_without_GCN(labels, cluster_centers_indices)
    print("Clustering results file ready")
    print("Preparing enrichment analysis data for PPI network ...")
    prepare_enrichment_clusters_without_GCN(dataset="PPI")
    print("Enrichment analysis data ready")