forked from priyagovindan/kpeak
-
Notifications
You must be signed in to change notification settings - Fork 0
/
kpeak_decomposition.py
282 lines (232 loc) · 14.3 KB
/
kpeak_decomposition.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
__author__ = 'priya'
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib.pyplot import rcParams
rcParams['figure.figsize'] = 10,7
# Function that computes peak numbers (i.e. performs k-peak decomposition)
def get_kpeak_decomposition(G):
G.remove_edges_from(G.selfloop_edges())
G = removeSingletons(G)
orig_core_nums = nx.core_number(G)
H = G.copy(); H_nodes = set(G.nodes())
current_core_nums = orig_core_nums.copy()
peak_numbers = {}
# Each iteration of the while loop finds a k-contour
while(len(H.nodes()) > 0):
# degen_core is the degeneracy of the graph
degen_core = nx.k_core(H) # Degen-core
# Nodes in the k-contour. Their current core number is their peak number.
kcontour_nodes = degen_core.nodes()
for n in kcontour_nodes:
peak_numbers[n] = current_core_nums[n]
# Removing the kcontour (i.e. degeneracy) and re-computing core numbers.
H_nodes = H_nodes.difference(set(kcontour_nodes))
H = G.subgraph(list(H_nodes))
current_core_nums = nx.core_number(H)
return peak_numbers, orig_core_nums
# Function that computes peak numbers while also keeping track of which k-contour (removal) affected a node the most
def get_kpeak_mountainassignment(G, orig_core_nums):
# Initializing node_CNdrops_mountainassignment
# 'node_CNdrops_mountainassignment' is a dict where keys are nodeIDS
# Each value is tuple of the maximum drop in core number observed for this node and the mountain to which it is assigned.
node_CNdrops_mountainassignment = {}
for n in G.nodes():
node_CNdrops_mountainassignment[n] = [0, -1] #diff in core number, assignment to a mountain
H = G.copy()
H_nodes = set(G.nodes())
current_core_nums = orig_core_nums.copy()
current_d = max(current_core_nums.values())
# 'current_plotmountain_id' keeps track of numbering of the plot-mountains
current_plotmountain_id = 0
peak_numbers = {}
# Each iteration of the while loop finds a k-contour
while(len(H.nodes()) > 0):
# degen_core is the degeneracy of the graph
degen_core = nx.k_core(H) # Degen-core
# Note that the actual mountains may consist of multiple components.
# To analyze each component, use the following line to find the components
# components_of_mountain = nx.connected_component_subgraphs(induced_subgraph_of_mountain)
# But in the mountain plot we plot the separate components related to a k-contour as a single mountain.
# Nodes in the k-contour. Their current core number is their peak number.
kcontour_nodes = degen_core.nodes()
for n in kcontour_nodes:
peak_numbers[n] = current_core_nums[n]
# Removing the kcontour (i.e. degeneracy) and re-computing core numbers.
H_nodes = H_nodes.difference(set(kcontour_nodes))
H = G.subgraph(list(H_nodes))
new_core_nums = nx.core_number(H)
for n in kcontour_nodes:
# For the nodes in kcontour, its removal causes its core number to drop to 0.
# Checking is this drop is greater than the drop in core number observed for these nodes in previous iterations
if current_core_nums[n] - 0 > node_CNdrops_mountainassignment[n][0]:
node_CNdrops_mountainassignment[n][0] = current_core_nums[n]
node_CNdrops_mountainassignment[n][1] = current_plotmountain_id
for n in new_core_nums:
# Checking is this drop is greater than the drop in core number observed for these nodes in previous iterations
if current_core_nums[n] - new_core_nums[n] > node_CNdrops_mountainassignment[n][0]:
node_CNdrops_mountainassignment[n][0] = current_core_nums[n] - new_core_nums[n]
node_CNdrops_mountainassignment[n][1] = current_plotmountain_id
current_plotmountain_id += 1
current_core_nums = new_core_nums.copy()
# Creating a dictionary of dictionary,
# such that a key represents the ID of a mountain
# and the value represents the a dictionary of nodes assigned to that mountain.
# eg. permountain_ID_core_peak_numbers[0] is a dict of mountain 0.
# Keys of the inner dictionary are nodes and value is a tuple <nodeID, corenumber, peak number>
permountain_ID_corenumber_peaknumber = {}
for n in orig_core_nums.keys():
if node_CNdrops_mountainassignment[n][1] not in permountain_ID_corenumber_peaknumber:
permountain_ID_corenumber_peaknumber[node_CNdrops_mountainassignment[n][1]] = {}
permountain_ID_corenumber_peaknumber[node_CNdrops_mountainassignment[n][1]][n] = (n, orig_core_nums[n],peak_numbers[n])
return permountain_ID_corenumber_peaknumber, peak_numbers
# Function that computes peak numbers while also keeping track of which k-contour (removal) affected a node the most
# This function plots each component separately
def get_kpeak_mountainassignment_component(G, orig_core_nums):
# Initializing node_CNdrops_mountainassignment
# 'node_CNdrops_mountainassignment' is a dict where keys are nodeIDS
# Each value is tuple of the maximum drop in core number observed for this node and the mountain to which it is assigned.
node_CNdrops_mountainassignment = {}
for n in G.nodes():
node_CNdrops_mountainassignment[n] = [0, -1] #diff in core number, assignment to a mountain
H = G.copy()
H_nodes = set(G.nodes())
current_core_nums = orig_core_nums.copy()
current_d = max(current_core_nums.values())
# 'current_plotmountain_id' keeps track of numbering of the plot-mountains
current_plotmountain_id = 0
peak_numbers = {}
# Each iteration of the while loop finds a k-contour
while(len(H.nodes()) > 0):
# degen_core is the degeneracy of the graph
degen_core = nx.k_core(H) # Degen-core
# Note that the actual mountains may consist of multiple components.
# To analyze each component, use the following line to find the components
# components_of_mountain = nx.connected_component_subgraphs(induced_subgraph_of_mountain)
# But in the mountain plot we plot the separate components related to a k-contour as a single mountain.
# Nodes in the k-contour. Their current core number is their peak number.
degen_core_comps = nx.connected_component_subgraphs(degen_core)
for comp in degen_core_comps:
kcontour_nodes = comp.nodes()
for n in kcontour_nodes:
peak_numbers[n] = current_core_nums[n]
# Removing the kcontour (i.e. degeneracy) and re-computing core numbers.
H_nodes = H_nodes.difference(set(kcontour_nodes))
H = G.subgraph(list(H_nodes))
new_core_nums = nx.core_number(H)
for n in kcontour_nodes:
# For the nodes in kcontour, its removal causes its core number to drop to 0.
# Checking is this drop is greater than the drop in core number observed for these nodes in previous iterations
if current_core_nums[n] - 0 > node_CNdrops_mountainassignment[n][0]:
node_CNdrops_mountainassignment[n][0] = current_core_nums[n]
node_CNdrops_mountainassignment[n][1] = current_plotmountain_id
for n in new_core_nums:
# Checking is this drop is greater than the drop in core number observed for these nodes in previous iterations
if current_core_nums[n] - new_core_nums[n] > node_CNdrops_mountainassignment[n][0]:
node_CNdrops_mountainassignment[n][0] = current_core_nums[n] - new_core_nums[n]
node_CNdrops_mountainassignment[n][1] = current_plotmountain_id
current_plotmountain_id += 1
current_core_nums = new_core_nums.copy()
# Creating a dictionary of dictionary,
# such that a key represents the ID of a mountain
# and the value represents the a dictionary of nodes assigned to that mountain.
# eg. permountain_ID_core_peak_numbers[0] is a dict of mountain 0.
# Keys of the inner dictionary are nodes and value is a tuple <nodeID, corenumber, peak number>
permountain_ID_corenumber_peaknumber = {}
for n in orig_core_nums.keys():
if node_CNdrops_mountainassignment[n][1] not in permountain_ID_corenumber_peaknumber:
permountain_ID_corenumber_peaknumber[node_CNdrops_mountainassignment[n][1]] = {}
permountain_ID_corenumber_peaknumber[node_CNdrops_mountainassignment[n][1]][n] = (n, orig_core_nums[n],peak_numbers[n])
return permountain_ID_corenumber_peaknumber, peak_numbers
# Function draws a mountain plot, given the peak numbers, core numbers and mountain assignment
def plot_mountains_given_mountainassignment_givencomponents(permountain_ID_core_peak_numbers, orig_core_nums, peak_numbers, graphname):
### Part 1 ####
# Sorting the nodes in each mountain
# The final ordering is such that nodes are ordered in descending order of core number
# The nodes with same core umber in a mountain are ordered (in descending order) of their peak number
# Arranging the values in arrays, of x and y axis to be plotted based on above ordering
x_vals = []; y_vals = []; y_vals2 = []; nodecount=0
mountain_breaks_x_vals = []
mountain_breaks_y_vals = []
for id in permountain_ID_core_peak_numbers:
mountaindict = permountain_ID_core_peak_numbers[id]
unsorted_tuples = mountaindict.values()
sortedbypeaknumber_tuples = sorted(unsorted_tuples, key=lambda xyv: xyv[2], reverse=True)
sortedbyCOREnumber_tuples = sorted(sortedbypeaknumber_tuples, key=lambda xyv: xyv[1], reverse=True)
# node_ordering_permountain[id] = [x for x, y, z in sortedbyCOREnumber_tuples]
nodelist_this_mountain = [x for x, y, z in sortedbyCOREnumber_tuples]
for i in range(len(nodelist_this_mountain)):
x_vals.append(nodecount); nodecount+=1
y_vals.append(orig_core_nums[nodelist_this_mountain[i]])
y_vals2.append(peak_numbers[nodelist_this_mountain[i]])
mountain_breaks_x_vals.append(nodecount)
mountain_breaks_y_vals.append(y_vals[-1])
### Part 2 ####
## The plotting
ax = plt.gca()
plt.fill_between(x_vals, y_vals, 0, color = 'lightblue') # Area under the core number values (blue line)
plt.plot(x_vals, y_vals, label = 'Core Number', color = 'blue')
plt.scatter(x_vals, y_vals2, color = 'r', label = 'Peak Number')
for i in range(len(mountain_breaks_x_vals)):
if i == len(mountain_breaks_x_vals) - 1:
plt.plot([mountain_breaks_x_vals[i]-1, mountain_breaks_x_vals[i]-1],[0, mountain_breaks_y_vals[i]], color = 'black', label = 'Boundary Between Mountains')
else:
plt.plot([mountain_breaks_x_vals[i]-1, mountain_breaks_x_vals[i]-1],[0, mountain_breaks_y_vals[i]], color = 'black')
plt.ylabel('Core Number or Peak Number', fontsize=20); plt.xlabel('Individual nodes', fontsize=20)
plt.legend(fontsize=18,bbox_to_anchor=(1.01, 1), prop={'size':18})
plt.xlim(0, len(orig_core_nums.keys()))
plt.ylim(0, max([orig_core_nums[x] for x in orig_core_nums]))
ax.tick_params(axis='x', labelsize=18); ax.tick_params(axis='y', labelsize=18)
# plt.show()
plt.savefig(graphname+'_mountainplot_withcomponents_and_mountainmarkers.pdf', bbox_inches='tight')
plt.close()
def plot_mountains_given_mountainassignment(permountain_ID_core_peak_numbers, orig_core_nums, peak_numbers, graphname):
### Part 1 ####
# Sorting the nodes in each mountain
# The final ordering is such that nodes are ordered in descending order of core number
# The nodes with same core umber in a mountain are ordered (in descending order) of their peak number
# Arranging the values in arrays, of x and y axis to be plotted based on above ordering
x_vals = []; y_vals = []; y_vals2 = []; nodecount=0
for id in permountain_ID_core_peak_numbers:
mountaindict = permountain_ID_core_peak_numbers[id]
unsorted_tuples = mountaindict.values()
sortedbypeaknumber_tuples = sorted(unsorted_tuples, key=lambda xyv: xyv[2], reverse=True)
sortedbyCOREnumber_tuples = sorted(sortedbypeaknumber_tuples, key=lambda xyv: xyv[1], reverse=True)
# node_ordering_permountain[id] = [x for x, y, z in sortedbyCOREnumber_tuples]
nodelist_this_mountain = [x for x, y, z in sortedbyCOREnumber_tuples]
for i in range(len(nodelist_this_mountain)):
x_vals.append(nodecount); nodecount+=1
y_vals.append(orig_core_nums[nodelist_this_mountain[i]])
y_vals2.append(peak_numbers[nodelist_this_mountain[i]])
### Part 2 ####
## The plotting
ax = plt.gca()
plt.fill_between(x_vals, y_vals, 0, color = 'lightblue') # Area under the core number values (blue line)
plt.plot(x_vals, y_vals, label = 'Core Number', color = 'blue')
plt.scatter(x_vals, y_vals2, color = 'r', label = 'Peak Number')
plt.ylabel('Core Number or Peak Number', fontsize=20); plt.xlabel('Individual nodes', fontsize=20)
plt.legend(fontsize=18,bbox_to_anchor=(1.01, 1), prop={'size':18})
plt.xlim(0, len(orig_core_nums.keys()))
plt.ylim(0, max([orig_core_nums[x] for x in orig_core_nums]))
ax.tick_params(axis='x', labelsize=18); ax.tick_params(axis='y', labelsize=18)
# plt.show()
plt.savefig(graphname+'_mountainplot.pdf', bbox_inches='tight')
plt.close()
# Function that computes peak numbers and then makes mountain plot
def draw_mountainplot(G, graphname):
G.remove_edges_from(G.selfloop_edges())
G = removeSingletons(G)
orig_core_nums = nx.core_number(G)
permountain_ID_corenumber_peaknumber, peak_numbers = get_kpeak_mountainassignment(G, orig_core_nums)
plot_mountains_given_mountainassignment(permountain_ID_corenumber_peaknumber, orig_core_nums, peak_numbers, graphname)
def draw_mountainplot_components(G, graphname):
G.remove_edges_from(G.selfloop_edges())
G = removeSingletons(G)
orig_core_nums = nx.core_number(G)
permountain_ID_corenumber_peaknumber, peak_numbers = get_kpeak_mountainassignment_component(G, orig_core_nums)
plot_mountains_given_mountainassignment_givencomponents(permountain_ID_corenumber_peaknumber, orig_core_nums, peak_numbers, graphname)
def removeSingletons(G):
degrees=G.degree()
for node in degrees.keys():
if degrees[node]==0:
G.remove_node(node)
return G