def graclus(adj, x, nclusters): r""" The greedy clustering algorithm from the `"Weighted Graph Cuts without Eigenvectors: A Multilevel Approach" <http://www.cs.utexas.edu/users/ inderjit/public_papers/multilevel_pami.pdf>`_ paper of picking an unmarked vertex and matching it with one of its unmarked neighbors (that maximizes its edge weight). Arguments --------- adj: `torch.FloatTensor` of size (N, N) Adjacency matrix, will be converted to sparse if it is not sparse x : `torch.FloatTensor` of size (N, D) Node feature embedding in D dimension nclusters: int Number of desired clusters. This is useless for graclus as it choose this number itself :rtype: :class:`LongTensor` """ if not adj.is_sparse: adj = adj.to_sparse() row, col = adj.indices() weight = adj.values() if graclus_cluster is None: raise NotImplementedError( "Graclus clsutering is not available, please use another clustering method") clusters = graclus_cluster(row, col, weight) return _cluster_to_matrix(clusters, nclusters=nclusters)
def test_graclus_cluster(test, dtype, device): row = tensor(test['row'], torch.long, device) col = tensor(test['col'], torch.long, device) weight = tensor(test.get('weight'), dtype, device) cluster = graclus_cluster(row, col, weight) assert_correct(row, col, cluster)
def graclus(edge_index, weight: Optional[torch.Tensor] = None, num_nodes: Optional[int] = None): r"""A greedy clustering algorithm from the `"Weighted Graph Cuts without Eigenvectors: A Multilevel Approach" <http://www.cs.utexas.edu/users/ inderjit/public_papers/multilevel_pami.pdf>`_ paper of picking an unmarked vertex and matching it with one of its unmarked neighbors (that maximizes its edge weight). The GPU algoithm is adapted from the `"A GPU Algorithm for Greedy Graph Matching" <http://www.staff.science.uu.nl/~bisse101/Articles/match12.pdf>`_ paper. Args: edge_index (LongTensor): The edge indices. weight (Tensor, optional): One-dimensional edge weights. (default: :obj:`None`) num_nodes (int, optional): The number of nodes, *i.e.* :obj:`max_val + 1` of :attr:`edge_index`. (default: :obj:`None`) :rtype: :class:`LongTensor` """ if graclus_cluster is None: raise ImportError('`graclus` requires `torch-cluster`.') return graclus_cluster(edge_index[0], edge_index[1], weight, num_nodes)
def graclus_coarsen(A: SparseTensor, level: int): row, col, wgt = A.coo() coarsen_cluster = [] for i in range(level): cluster = graclus_cluster(row, col, wgt) _, cluster = cluster.unique(return_inverse=True) (row, col), wgt = pool_edge(cluster, torch.stack([row, col]), wgt) coarsen_cluster.append(cluster.cpu().numpy()) return row, col, wgt, coarsen_cluster
def test_graclus_cluster_gpu(tensor, i): # pragma: no cover data = tests[i] row = torch.cuda.LongTensor(data['row']) col = torch.cuda.LongTensor(data['col']) weight = data.get('weight') weight = weight if weight is None else getattr(torch.cuda, tensor)(weight) cluster = graclus_cluster(row, col, weight) assert_correct_graclus(row, col, cluster)
def graclus_pool(data, weight=None, transform=None): row, col = data.index cluster = graclus_cluster(row, col, weight, data.num_nodes) cluster, batch = consecutive_cluster(cluster, data.batch) x = max_pool(data.input, cluster) edge_index, edge_attr, pos = pool(data.index, cluster, data.weight, data.pos) data = Data(x, pos, edge_index, edge_attr, data.target, batch) if transform is not None: data = transform(data) return data
def graph_clustering(A_matrix,method,n_clusters,ratio=None,graph_num=None,plotting=True,Mean=False): if(graph_num==None): graph_num = random.randint(1,len(A_matrix))-1 if(Mean): graph_num = 0; A_matrix = np.mean(A_matrix,axis=0,keepdims=True) n = A_matrix.shape[1] if(method=='kmeans'): #kmeans on first n vectors with nonzero eigenvalues _, vecs = graph_representation(train_A=A_matrix,graph_num=graph_num,Prop='Spectral',plotting=False) kmeans = KMeans(n_clusters=n_clusters) kmeans.fit(vecs[:,1:n_clusters].reshape(-1,n_clusters-1)) if(ratio==None): return kmeans.labels_ num = np.sum(kmeans.labels_) ind = 0 if num>(n//2) else 1 prob = (kmeans.fit_transform(vecs[:,1:n_clusters].reshape(-1,n_clusters-1))) thresh = np.quantile(prob[:,ind], ratio) return (prob[:,ind] >= thresh) elif(method=='Spectral_clustering'): adjacency_matrix = A_matrix[graph_num].reshape(n,n) sc = SpectralClustering(n_clusters, affinity='precomputed', n_init=100, assign_labels='discretize') Class = sc.fit_predict(adjacency_matrix) if(plotting): Ab_matrix = A_binarize(A_matrix) G = nx.Graph(Ab_matrix[graph_num]) plt.figure(); nx.draw(G, node_size=200, pos=nx.spring_layout(G)); plt.show() plt.figure(); nx.draw(G, node_color=Class, node_size=200, pos=nx.spring_layout(G)); plt.show() return Class elif(method=='Affinity_propagation'): _, vecs = graph_representation(train_A=A_matrix,graph_num=graph_num,Prop='Spectral',plotting=False) clustering = AffinityPropagation().fit(vecs[:,1:n_clusters]) elif(method=='Agglomerative_clustering'): _, vecs = graph_representation(train_A=A_matrix,graph_num=graph_num,Prop='Spectral',plotting=False) clustering = AgglomerativeClustering(n_clusters=n_clusters).fit(vecs[:,1:n_clusters].reshape(-1,n_clusters-1)) elif(method=='Graclus'): sA = sparse.csr_matrix(A_matrix[graph_num]) edge_index, edge_weight = g_utils.from_scipy_sparse_matrix(sA) cluster = graclus_cluster(edge_index[0], edge_index[1], edge_weight) return cluster.numpy() else: raise Exception("non-existing clustering method") return clustering.labels_
def graclus(edge_index, weight=None, num_nodes=None): row, col = edge_index return graclus_cluster(row, col, weight, num_nodes)
def graclus(edge_index, weight=None): row, col = edge_index return graclus_cluster(row, col, weight)
def WeightCorrection(classiResultsFiles,num_classes,GraphResultsFiles,GraphPartitionVisualization, OptimizedNet,PredAddEdgeResults,LinkPredictionMethod,VectorPairs,WeightCorrectionCoeffi,UseOld): if os.path.exists(PredAddEdgeResults) and UseOld==True: predLinkWeight=np.load(PredAddEdgeResults) else: Graph_array,Gragh_unwighted_array=[],[] LayerNodeNum=[] startNodeNums=0 state_dict = OptimizedNet.state_dict() if os.path.exists(classiResultsFiles) and os.path.exists(GraphResultsFiles) and UseOld==True: frC=open(classiResultsFiles,'rb') PartitionResults=pickle.load(frC) frG=open(GraphResultsFiles,'rb') G=pickle.load(frG) L=nx.adjacency_matrix(G) incidence_matrix=nx.incidence_matrix(G) algebraic_connectivity,fiedler_vector=Compute_fiedler_vector(G) else: for layer_name in state_dict: if ("layers" in layer_name) and ("weight" in layer_name): Weight=state_dict[layer_name] print(Weight.shape) if Weight.dim()==3: Weight=torch.squeeze(Weight) DimCompress=True Weight=Weight.cpu().detach().numpy() Gone,G_unweighted=WeightsToAdjaency(Weight,startNodeNums) startNodeNums+=Gone.number_of_nodes() LayerNodeNum.append(Gone.number_of_nodes()) Graph_array.append(Gone) Gragh_unwighted_array.append(G_unweighted) G= nx.compose(Graph_array[0],Graph_array[1]) Gu= nx.compose(Gragh_unwighted_array[0],Gragh_unwighted_array[1]) L=nx.adjacency_matrix(G) incidence_matrix=nx.incidence_matrix(Gu) StartNodes,EndNodes,Edges,EdgeWeights=[],[],[],[] for edges in G.edges(): if 'weight' in G.get_edge_data(edges[0],edges[1]): StartNodes.append(edges[0]) EndNodes.append(edges[1]) Edges.append(list(edges)) EdgeWeights.append(G[edges[0]][edges[1]]['weight']) cluster=graclus_cluster(torch.tensor(StartNodes),torch.tensor(EndNodes),torch.tensor(EdgeWeights)) print("cluster num is",len(set(cluster.tolist()))) #comps=nx.connected_components(G) # # G_array=[G] iter1=0 while len(G_array) < num_classes and iter1<math.floor(math.log(num_classes,2))+1: G_array_tmp=[] partition,PartitionResults={},{} lab=0 for iter2 in range(len(G_array)): if G_array[iter2].number_of_edges()>0: Gsub=Fiedler_vector_cluster(G_array[iter2],0+2*iter2) for i in range(len(Gsub)): G_array_tmp.append(Gsub[i]) PartitionResults.update({lab:list(Gsub[i].nodes)}) partition,kk,duplicated= PartitionDict(list(Gsub[i].nodes),partition,lab) lab+=1 else: PartitionResults.update({lab:list(G_array[iter2].nodes)}) partition,kk,duplicated= PartitionDict(Gsub,partition,lab) lab+=1 iter1+=1 G_array=G_array_tmp ### saving predLinkWeight=WeightedLinkPrediction(G,PartitionResults,LinkPredictionMethod,VectorPairs) np.save(PredAddEdgeResults,predLinkWeight) if len(G_array)>4: fwC=open(classiResultsFiles,'wb') pickle.dump(partition,fwC) fwG=open(GraphResultsFiles,'wb') pickle.dump(G,fwG) if len(predLinkWeight)==0: pass else: print(predLinkWeight) state_dict = OptimizedNet.state_dict() NeededAddEdges=[] for layer_name in state_dict: if ("layers" in layer_name) and ("weight" in layer_name): Weight=state_dict[layer_name] if Weight.dim()==3: Weight=torch.squeeze(Weight) DimCompress=True else: DimCompress=False M,N=Weight.shape BaseNode=0 for iter1 in range(len(predLinkWeight)): if BaseNode<=predLinkWeight[iter1][0]<=(BaseNode+M) and (BaseNode+M)<=predLinkWeight[iter1][1]<=(BaseNode+M+N): Weight[int(predLinkWeight[iter1][0]-BaseNode),int(predLinkWeight[iter1][1]-M-BaseNode)]+=WeightCorrectionCoeffi*predLinkWeight[iter1][2] tmp=Weight[int(predLinkWeight[iter1][0]-BaseNode),int(predLinkWeight[iter1][1]-M-BaseNode)] print("Weight change from {} to {} at connection between node {} to {} weight errors.".format(round(tmp.item(),4),round(predLinkWeight[iter1][2],4),int(predLinkWeight[iter1][0]-BaseNode),int(predLinkWeight[iter1][1]-M-BaseNode))) elif ((BaseNode<=predLinkWeight[iter1][0]<BaseNode+M and BaseNode<predLinkWeight[iter1][1]<=(BaseNode+M))) or ((BaseNode+M<=predLinkWeight[iter1][0]<(BaseNode+M+N) and (BaseNode+M)<predLinkWeight[iter1][1]<=(BaseNode+M+N))): print("Need add peer topology from node {} to {}".format(int(predLinkWeight[iter1][0]-BaseNode),int(predLinkWeight[iter1][1]-BaseNode))) NeededAddEdges.append(predLinkWeight[iter1]) else: print("Topology wrong that need correct from node {} to {}".format(int(predLinkWeight[iter1][0]-BaseNode),int(predLinkWeight[iter1][1]-BaseNode))) if DimCompress==True: state_dict[layer_name]=torch.unsqueeze(Weight,0) else: pass BaseNode=M+N OptimizedNet.load_state_dict(state_dict) return OptimizedNet