-
Notifications
You must be signed in to change notification settings - Fork 3
/
local_perturbation.py
324 lines (276 loc) · 8.77 KB
/
local_perturbation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
#-*- coding:utf-8 -*-
import os
import networkx as nx
import matplotlib.pyplot as plt
import sys
import gn
from gn import *
import random
import itertools
def f():
print 4
def modularity(G,newG,Bestcomps):
m=G.size()
nm=newG.size()
q=nq=0
for coms in Bestcomps:
subG=G.subgraph(coms)
l=subG.size()
list_d=[x for y,x in G.degree_iter(coms)]
d=0
for k in list_d:
d=d+k
q=q+float(l)/m+(float(d)/(2*m))**2
subN=newG.subgraph(coms)
nl=subN.size()
list_nd=[a for b,a in newG.degree_iter(coms)]
nd=0
for k in list_nd:
nd=nd+k
nq=nq+float(nl)/nm+(float(nd)/(2*nm))**2
return q-nq
def disturb(g,cl):
ng=g.copy()
ng.remove_edges_from(ng.edges())
for i in range(len(cl)-1):#连接簇之间不变的线
j=i+1
while j<len(cl):
for x in itertools.product(cl[i],cl[j]):#簇之间两两(cl[i],cl[j])笛卡尔积
if g.has_edge(x[0],x[1]):
ng.add_edge(x[0],x[1])
j+=1
sub=[]
for i in range(len(cl)):#打乱簇内线
sub=g.subgraph(cl[i])
edges=sub.edges()
numOfe=sub.number_of_edges()
sub.remove_edges_from(edges)
setE=[]
tupleE=[]
for k in range(numOfe):#生成numOfe条线
l=set(random.sample(cl[i],2))#随机生成cl[i]内两个数,并生成集合,因为集合无序,容易判断该两个数是否已经生成了
while l in setE:
l=set(random.sample(cl[i],2))
setE.append(l)
for item in setE:#集合变元组,用来添加边
tupleE.append(tuple(item))
ng.add_edges_from(tupleE)
return ng
#global Bestcomps
def analyze(newComps,Bestcomps):
j={}
k=0
for best in Bestcomps:
best_val=0
for c in newComps:
val=len([val for val in best if val in c])
if val>best_val:
best_val=val
j[k]=float(best_val)/float(len(best))
k+=1
return j
def readfile_net(G,path=None):
G.clear()
import shlex
try:
f=open(path,'r')
except :
print "readfile_net error"
for line in f:
if line.lower().startswith("*edges"):
break
for line in f:
temp=line.split()
if len(temp)<2:
continue
u,v=temp[0:2]
if u!=v:#去除单节点
G.add_edge(u,v)
f.close()
## return G
def togn(G):
n = G.number_of_nodes() #|V|
A = nx.adj_matrix(G) #adjacenct matrix
m_ = 0.0 #the weighted version for number of edges
for i in range(0,n):
for j in range(0,n):
m_ += A[i,j]
m_ = m_/2.0
#calculate the weighted degree for each node
Orig_deg = {}
Orig_deg = UpdateDeg(A, G.nodes())
#run Newman alg
Bestcomps=runGirvanNewman(G, Orig_deg, m_)
return Bestcomps
def graphtodegree(G):
deglist=list()
for k,v in nx.degree(G).items():
deglist.append({'name':k,'deg':v})
return deglist
def node_dis(G,u,v):#计算两个节点的距离
n = G.number_of_nodes()
neigbu=G[u]
setu=set(neigbu.keys())
neigbv=G[v]
setv=set(neigbv.keys())
if u in setv:
setv.remove(u)
setu.remove(v)
val=setu^setv
p=len(val)
return 1.0*p/(n-2)
def dist(G,v,list):#计算节点和分组的距离
m=0
for i in list:
m=node_dis(G,v,i)+m
n=len(list)
return 1.0*m/n
def delnode(G,v,deglist):#将v从deglist中删除
temp=[]
for each in deglist:
if v!=each['name']:
temp.append(each)
deglist=temp
return deglist
def match(G,u,v,delist):#???判断两节点是否在嵌套列表的同一个分组中
for i in range(len(delist)):
if (u in delist[i] and v in delist[i]):
return True
return False
def k_cluster(G,deglist,k,Bestcomps):#生成大小为k的n个分组
#i=1#记录分组数目
#j=1#记录候选列表中节点数目
#q=1#记录候选分组中的分组数目
global dist
M=[]#候选节点列表
cl=[]
m=(len(deglist))/int(k)
#while(deglist!= None):
while (len(deglist)>k):
for i in range(m):
seed=deglist[0]['name']
#index(seed,Bestcomps)
cl.append([seed])
#del deglist[0]
deglist=delnode(G,seed,deglist)
while (len(cl[i])<k):
M=[]#sam
v=deglist[0]['name']
mindist=dist(G,v,cl[i])
for node in deglist:#找出最小距离的节点
j=node['name']
dis=dist(G,j,cl[i])
if dis<mindist:
mindist=dis
bestnode=j
for node in deglist:#找出最小距离的节点集合
j=node['name']
dis=dist(G,j,cl[i])
if dis==mindist:
M.append(j)
if len(M)>1:#候选节点有多个的情况
j=0
while (j<len(M)):
#print seed,M[j],Bestcomps,match(G,seed,M[j],Bestcomps)
if(match(G,seed,M[j],Bestcomps)):
cl[i].append(M[j])
deglist=delnode(G,M[j],deglist)
break
else:
j=j+1
if (j==len(M)-1):
cl[i].append(M[j])
deglist=delnode(G,M[j],deglist)
break
# for v in M:
# if(match(seed,v,Bestcomps)):
# cl[i].append(v)
# delnode(v,deglist)
# else:
# cl[i].append(M[len(M)-1])
# delnode(M[len(M)-1],deglist)
if (len(M)==1):#候选节点只有一个
cl[i].append(M[0])
deglist=delnode(G,M[0],deglist)
#for remain in deglist:#处理剩下的少于k个的节点
while(len(deglist)!=0):
remain=deglist[0]
v=remain['name']
mincl=dist(G,v,cl[0])
bestcl=0
for i in range(len(cl)):
if(match(G,cl[i][0],v,Bestcomps)):#优先加入到与seed同社区的分组中
bestcl=i
break
#cl[i].append(j)
#delnode(j,deglist)
else:
distcl=dist(G,v,cl[i])
if distcl<mincl:
mincl=distcl
bestcl=i
cl[bestcl].append(v)
deglist=delnode(G,v,deglist)
return cl
def APL(G):#非连通网络的平均最短距离
n=G.number_of_nodes()
sum=0.0
for g in nx.connected_component_subgraphs(G):
m=g.number_of_nodes()
if m==1:
pass
else:
sum=sum+(1.0*m/n)*(nx.average_shortest_path_length(g))
return sum
def LocalPerturbation(G,k):
#plt.subplot(111)
## plt.figure("Original Graph")
## nx.draw(G,with_labels = True,pos=nx.spring_layout(G))
## plt.show()
toG=G.copy()#togn会改变G,用副本传递
Bestcomps=togn(toG)#初探社区
deglist=graphtodegree(G)
deglist.sort(key=lambda deg:(-deg['deg'],deg['name']),reverse=False)#将节点降序排列
cl=k_cluster(G,deglist,k,Bestcomps)#分簇
nG=disturb(G,cl)#扰乱后新的图
return [cl,nG] #return these cluters
def main(argv):
filepath="./data/9.txt"
H=nx.Graph()# undirect
G=H.to_undirected()
readfile_net(G,path=filepath)
print nx.average_clustering(G)#CC
print APL(G)#APL
#togn(G)
toG=G.copy()#togn会改变G,用副本传递
Bestcomps=togn(toG)#初探社区
readfile_net(G,path=filepath)
deglist=graphtodegree(G)
deglist.sort(key=lambda deg:(-deg['deg'],deg['name']),reverse=False)#将节点降序排列
cl=k_cluster(G,deglist,3,Bestcomps)#分簇
#new G
newG=disturb(G,cl)
print nx.average_clustering(newG)#CC
print APL(newG)#APL
toN=newG.copy()
newComps=togn(toN)#新的划分
j=analyze(newComps,Bestcomps)
com=0.0
for i in range(len(j)):
com+=j[i]
average=com/len(j)
print "average lost:",average
delta_q=modularity(G,newG,Bestcomps)
print "delta_q:",delta_q
## plt.subplot(211)
##
## plt.title("old")
## nx.draw(G,with_labels = True,pos=nx.spring_layout(G))
##
##
## plt.subplot(212)
## plt.title("new")
## nx.draw(newG,with_labels = True,pos=nx.spring_layout(newG))
## plt.show()
if __name__ == "__main__":
sys.exit(main(sys.argv))