/
infection.py
executable file
·339 lines (274 loc) · 12.5 KB
/
infection.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
#!/usr/bin/env python3.5
"""
Khan Academy's Limited Infection Problem
========================================
See ./README.md for details
Some notes:
* Some directed edge from A->B indicates that A coaches B.
* The fact that the network is directed actually isn't super important, as the mere fact that
two nodes are connected is important to keep track of.
"""
import sys
import argparse
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import networkx as nx
# I have a big screen
np.set_printoptions(linewidth=160)
def main():
"""
In the event this is run from the command line, this main function provides sample use.
"""
args = get_args()
infection = NetworkInfection(args.nodes, args.prob, args.write, refresh=args.refresh)
infection.load()
infection.choose()
if args.limited:
states = infection.limited_infection(args.size, args.stickiness)
else:
states = infection.total_infection()
if args.animate:
infection.animate_infection(states)
class NetworkInfection(object):
"""
Network Infection
Responsible for management of our network and current infection state.
:param nodecount: int => Number of nodes in generated Network
:param prob: float => Probability in generated network for edge to be created
:param write: bool => Whether or not to save the network animation as .mp4
:returns: <NetworkInfection>
"""
def __init__(self, nodecount: int, prob: float, write: bool,
filename='./test/testnetwork.npy',
refresh=False, choose_node=False) -> object:
self.networkfile = filename
self.graph = None
self.nxgraph = None
self.choice = choose_node
self.write = write
self.infections = None
self.subgraphs = False
if refresh:
gen_new_random_graph(nodecount, prob)
self.filename = './test/testnetwork.npy'
def load(self) -> None:
"""
Loads adjacency matrix network from provided .npy file.
filename is set in class instance.
"""
self.graph = np.load(self.networkfile)
self.nxgraph = nx.DiGraph(self.graph)
if nx.number_weakly_connected_components(self.nxgraph) > 1:
self.subgraphs = True
def show(self) -> None:
"""
Draws the current network using Matplotlib.
NOTE: BLOCKING. If this is done, it will stop any code execution.
"""
plt.figure()
nx.draw(self.nxgraph, pos=nx.spring_layout(self.nxgraph))
plt.show()
def choose(self) -> None:
"""
Selects a random node to initially infect for every independent subgraph.
Then updates the list of infected nodes based on these choices.
"""
if isinstance(self.choice, bool): # Prevent from re-picking
if self.choice:
self.choice = [input('Select Node(s)')] # Not really intended for use
else:
self.choice = []
for graph in nx.weakly_connected_component_subgraphs(self.nxgraph):
self.choice.append(np.random.choice(graph.nodes()))
self._infection_list()
def _infection_list(self) -> None:
"""
Updates infection list.
"""
self.infections = {n:(True if n in self.choice else False) for n in self.nxgraph.nodes()}
def total_infection(self) -> list:
"""
This part is straightforward, just simple BFS graph traversal on each independent subgraph.
"""
states = [dict_item_sort(self.infections.items())]
subgraphs = list(nx.weakly_connected_component_subgraphs(self.nxgraph))
for i, graph in enumerate(subgraphs):
choice = self.choice[i]
bfs = nx.bfs_edges(graph, choice) # DFS would also work here
for start, end in bfs:
self.infections[end] = True
states.append(dict_item_sort(self.infections.items()))
states.append(dict_item_sort(self.infections.items()))
return states
def limited_infection(self, infection_size: int, stickiness: int) -> list:
"""
We can look at this as virus propagation. As similar as this is with the total infection
problem, we actually want to use a completely different approach
We want to start the infection at the most central node.
* The core idea here is that since we want to limit our network so that it only affects
"groups" of people, we want to initially start at the most connected person so that way
there's less of a chance of only halfway infecting a network.
* THIS IS MUCH BETTER THAN RANDOM CHOICE (probably?)
Decaying Markov Chain
* Probabilities decay proportionally to centrality. In essence, the further away from
the center you get, the less chance you have of being infected.
* Using 1 / ((x+c)**2) where x is size of infection and c is centrality for the node
* Combined with flat threshhold
* Breakout condition is if it's bounced around inside the network 3 times
"""
# If no infection size, set to max size of graph and rely on decay process
if infection_size == -1:
infection_size = len(self.nxgraph.nodes())
scores, node = self._graph_centrality()
self.choice = [node] # We want this central node to be choice
self._infection_list() # Need to refresh infection status
states = [dict_item_sort(self.infections.items())]
markovchain = self._get_markovchain()
cnode = node
size = self._infection_size()
network_stickiness = 0
while size < infection_size: # Rebalances cnode weights /every/ cycle
# Choose next node to jump to
pnode = cnode
cnode = np.random.choice(np.arange(self.graph.shape[0]), p=markovchain[:, cnode])
# Check Stickiness
if self.infections[cnode] is False:
network_stickiness = 0
else:
network_stickiness += 1
if network_stickiness >= stickiness:
break
# Set its status to "infected"
self.infections[cnode] = True
# Rebalance current choices.
# As size of infected network increases, and as we get further away from the center
# lower probs Increase probability of a backjump (to stay close to center and keep
# infecting from there)
size = self._infection_size()
weights = np.array([1 / ((size + scores[i][1])**2)
for i in range(self.graph.shape[0])])
weights /= weights.sum()
markovchain[:, cnode] = weights
markovchain[pnode, cnode] += 1 - markovchain[:, cnode].sum()
states.append(dict_item_sort(self.infections.items()))
states.append(dict_item_sort(self.infections.items()))
print('Final Infection Size: {}'.format(self._infection_size()))
return states
def _get_markovchain(self) -> np.ndarray:
"""
Returns randomized initial markov chain for graph.
How this works: in order to determine next position, randomly pick entry from column
corresponding to current position.
"""
# Markov chain is initially randomized probabilities
markovchain = self.graph + np.eye(self.graph.shape[0])
markovchain *= np.random.random(self.graph.shape)
# Need to normalize (columns need to sum to 1)
markovchain /= markovchain.sum(axis=0)
return markovchain
def _infection_size(self) -> int:
count = 0
for key, value in self.infections.items():
if value is True:
count += 1
return count
def _graph_centrality(self) -> (list, int):
"""
Finds the most central node in the graph.
https://en.wikipedia.org/wiki/Centrality
This uses the eigenvector centrality:
https://networkx.github.io/documentation/latest/reference/generated/networkx.algorithms.centrality.eigenvector_centrality.html#networkx.algorithms.centrality.eigenvector_centrality
"""
centrality_scores = [(a, b) for a, b in nx.eigenvector_centrality(self.nxgraph).items()]
# We now use this central node as our choice
central_node = max(centrality_scores, key=lambda tup: tup[1])[0]
return centrality_scores, central_node
def naive_limited_infection(self) -> list:
"""
NAIVE LIMITED INFECTION
Just does a DFS with decay factor
"""
states = [dict_item_sort(self.infections.items())]
subgraphs = list(nx.weakly_connected_component_subgraphs(self.nxgraph))
for i, graph in enumerate(subgraphs):
choice = self.choice[i]
bfs = nx.bfs_edges(graph, choice) # DFS would also work here
cnode = choice
for start, end in bfs:
cnode = start
weight = len(nx.shortest_path(self.nxgraph, source=choice, target=cnode))
if np.random.random() > np.exp(-weight + 1.5):
break
self.infections[end] = True
states.append(dict_item_sort(self.infections.items()))
states.append(dict_item_sort(self.infections.items()))
return states
def animate_infection(self, states: list) -> None:
"""
Animate Infection Spread
:param states: list => 2D list of network states
"""
fig = plt.figure()
ax = fig.add_axes([0.1, 0.1, 0.8, 0.8])
pos = nx.spring_layout(self.nxgraph)
colors = np.zeros((len(states[0]), len(states)))
for i in range(len(states)):
colors[:, i] = [0 if infection is False else 1 for node, infection in states[i]]
nodes = nx.draw_networkx_nodes(self.nxgraph, pos=pos, node_color=colors[:, 0])
edges = nx.draw_networkx_edges(self.nxgraph, pos=pos)
labels = nx.draw_networkx_labels(self.nxgraph, pos=pos, font_color='w')
def animate(i):
""" Change plot state for next frame """
nodes = nx.draw_networkx_nodes(self.nxgraph, pos=pos, node_color=colors[:, i])
return nodes, edges
def init():
""" First animation Frame """
return nodes, edges
ani = animation.FuncAnimation(fig, animate, np.arange(len(states)), init_func=init,
interval=50)
if self.write:
Writer = animation.writers['ffmpeg']
writer = Writer(fps=10, metadata=dict(artist='Zoe Farmer'), bitrate=1800)
ani.save('infection.mp4', writer=writer)
plt.show()
def gen_new_random_graph(nodecount: int, prob: float) -> None:
"""
Generate a new random graph using binomial generation.
Will save new network to file.
"""
newgraph = nx.binomial_graph(nodecount, prob)
np.save('./test/testnetwork.npy', nx.adjacency_matrix(newgraph).todense())
def dict_item_sort(dlist: list) -> list:
"""
Provides sorted version of infection list. Need to sort list form of infection as
dictionaries are unsorted
"""
return sorted(dlist, key=lambda tup: tup[0])
def get_args() -> argparse.Namespace:
"""
Get command line arguments with argparse
"""
parser = argparse.ArgumentParser()
parser.add_argument('-r', '--refresh', action='store_true', default=False,
help='Refresh Graph')
parser.add_argument('-a', '--animate', action='store_true', default=False,
help='Animate Infection')
parser.add_argument('-w', '--write', action='store_true', default=False,
help='Save Animation')
parser.add_argument('-l', '--limited', action='store_true', default=False,
help='Limited Infection or Total? -l indicates limited')
parser.add_argument('-n', '--nodes', type=int, default=20,
help='How many nodes to generate')
parser.add_argument('-p', '--prob', type=float, default=0.2,
help='Edge Probability')
parser.add_argument('-s', '--size', type=int, default=-1,
help='How many nodes to infect')
parser.add_argument('-k', '--stickiness', type=int, default=3,
help='How sticky the Markov Process is')
args = parser.parse_args()
if args.size != -1:
args.stickiness = 100
return args
if __name__ == '__main__':
sys.exit(main())