-
Notifications
You must be signed in to change notification settings - Fork 0
/
DirNetsPackage.py
275 lines (203 loc) · 10.4 KB
/
DirNetsPackage.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# -*- coding: utf-8 -*-
"""
Created on Thu Jun 22 10:18:36 2017
@author: gjgut
"""
def InLists(AdjacencyMatrix):
import numpy as np
nrows, ncols = AdjacencyMatrix.shape
if nrows != ncols:
print(f"Error, not adjacencey matrix - has {nrows} rows vs {ncols} columns.")
return
Inlist = [[] for j in range(ncols)]
InWeights = [[] for j in range(ncols)]
#index by columns first to get inward connections to node
for col in range(ncols):
for row in range(nrows):
if AdjacencyMatrix[row][col] > 0.0: #if there is a connection
Inlist[col].append(row)
InWeights[col].append(AdjacencyMatrix[row][col])
InDeg = np.zeros(ncols)
for ii in range(len(Inlist)):
InDeg[ii] = np.count_nonzero(Inlist[ii])
return Inlist, InWeights, InDeg, ncols
def OutLists(AdjacencyMatrix):
import numpy as np
nrows, ncols = AdjacencyMatrix.shape
if nrows != ncols:
print(f"Error, not adjacencey matrix - has {nrows} rows vs {ncols} columns.")
return
Outlist = [[] for j in range(ncols)]
OutWeights = [[] for j in range(ncols)]
#index by rows first to get outward connections to node
for row in range(nrows):
for col in range(ncols):
if AdjacencyMatrix[row][col] > 0.0: #if there is a connection
Outlist[row].append(col)
OutWeights[row].append(AdjacencyMatrix[row][col])
OutDeg = np.zeros(ncols)
for ii in range(len(Outlist)):
OutDeg[ii] = np.count_nonzero(Outlist[ii])
return Outlist, OutWeights, OutDeg, ncols
def InList_AdjMat(InList,InWeights):
import numpy as np
nodes = len(InList)
AdjMat = np.zeros([nodes,nodes])
for col in range(nodes):
for row in range(len(InList[col])):
AdjMat[InList[col][row]][col] = InWeights[col][row]
return AdjMat
def OutList_AdjMat(OutList,OutWeights):
import numpy as np
nodes = len(OutList)
AdjMat = np.zeros([nodes,nodes])
for row in range(nodes):
for col in range(len(OutList[row])):
AdjMat[row][OutList[row][col]] = OutWeights[row][col]
return AdjMat
def GroundTrials(DirAdjMat,Trials,Beta):
'''
DirAdjMat is a directed, weighted adjacency matrix.
Trials are the number of individual simulations to perform per node.
Beta is a renormalization variable which sets the max weight in network to Beta.
When a network has many cyclical connections and high probs on transmission,
(weights close to one), it is necessary to renormalize the weights so
as to mitigate runaway transmission probability propagation. Also, this Beta
should match the Beta used in the McAlgorithm function as that function's purpose
is to approximate the results of this function. Set Beta to one if not using
renormalization.
'''
import numpy as np
import random as rd
import matplotlib.pyplot as plt
import time
OutList, OutWeights, OutDeg, Nodes = OutLists(DirAdjMat)
SimHist = np.zeros([Trials, Nodes])
start = time.time()
for infnode in range(Nodes):
print(infnode)
for trial in range(Trials):
NowState = np.zeros(Nodes)
NxtState = np.zeros(Nodes)
NowState[infnode] = 1
InfHist = np.zeros((Nodes + 1,Nodes))
cont_sim = 1
t = 0
while cont_sim != 0:
cont_sim = 0
InfHist[t] = NowState #set row t InfectionHistory to the updated NowStates
for n in range(len(NowState)): #cycle through NowStates
if NowState[n] == 1: #if infected
cont_sim = 1 #as long as one node is infected, the trial continues
NxtState[n] = 2 #set infected node to recovered in NextStates
for out_con in range(len(OutWeights[n])): # cycle through infnode's outgoing cons
if NowState[OutList[n][out_con]] == 0:
frand = rd.uniform(0,1) #roll the dice
if frand < Beta*OutWeights[n][out_con]: #if infection successful. Beta multiplies weights here
NxtState[OutList[n][out_con]] = 1 #set suscept node to infected in next state list
NowState = np.copy(NxtState) #set updated node state list as original node states for cycle
t = t + 1
SimHist[trial][infnode] = np.count_nonzero(InfHist[t-1]) - 1
#keeps track of each node's propagation of infection for every trial
#subtracts one at end to exclude the initial beginning of the infection at that node
end = time.time()
runtime = end - start
NAvgInfs = SimHist.mean(axis=0)
MeanInfs = NAvgInfs.mean()
StdDev = NAvgInfs.std()
Error = SimHist.std(axis=0)/np.sqrt(Trials)
plt.errorbar(range(Nodes),NAvgInfs,yerr=Error,fmt='-',ecolor='tomato')
plt.title(f'Average Infections Caused by Seed Node (N={Nodes},Trials={Trials},Mean={MeanInfs:.3f},\u03c3={StdDev:.3f},Runtime={runtime:.3f} seconds)', fontsize=15)
plt.xlabel('Node ID',fontsize=15)
plt.ylabel('Infections',fontsize=15)
return SimHist, NAvgInfs, runtime
def McAlgorithm(DirAdjMat,Beta,Termination,GroundAvgInfs,GroundSimHist,GroundRunTime):
'''
this algorithm operates using the inward bound connections, instead of the outward bound connections.
Beta is a renormalization variable. Multiplying by Beta makes the max weight in network Beta
this term dictates how long the sim will get propagate the smaller the term, the longer the
probabilities will be calculated
'''
import time
import numpy as np
import matplotlib.pyplot as plt
InList, InWeights, InDeg, Nodes = InLists(DirAdjMat)
AvgInfs = np.zeros(Nodes)
start = time.time()
for seed in range(Nodes):
print(seed)
prev_uninfected = np.ones(Nodes) #prob that node has remained uninfected until t-1
uninfected = np.ones(Nodes) #prob that node has remained uninfected until t. starts at one for each node
last_infected = np.zeros(Nodes) #probability that node was infected on last timestep
cur_infected = np.zeros(Nodes) #probability that node was infected on current timestep
last_infected[seed] = 1
uninfected[seed] = 0
t = 0
while sum(prev_uninfected - uninfected) > Termination:
prev_uninfected = np.copy(uninfected)
for node in range(Nodes):
prob_uninfected = 1 #set probability of specific node being uninfected to one
for con in range(len(InList[node])): #look at each incoming connection
prob_uninfected = prob_uninfected*(1-(last_infected[InList[node][con]]*(Beta*InWeights[node][con])))
#prob that node remains uninfected decreases as we go through each connection.
cur_infected[node] = (1-prob_uninfected)*uninfected[node]
#prob of current infection is prob of being infected times the prob that node hasn't been infected yet
for node in range(Nodes):
last_infected[node] = cur_infected[node]
#update our last infected list for each timestep
uninfected[node] = uninfected[node] - cur_infected[node]
#new prob of being uninfected is old prob - the prob that node is currently infected
t = t+1
AvgInfs[seed] = sum(1 - uninfected) - 1 #dont want to include seed node infection in total
end = time.time()
runtime = end - start
MeanInfs = AvgInfs.mean()
GroundMeanInfs = GroundAvgInfs.mean()
MeanPercDif = ((GroundMeanInfs - MeanInfs)/GroundMeanInfs)*100
RunTimePercDif =((GroundRunTime - runtime)/GroundRunTime)*100
GroundError = GroundSimHist.std(axis=0)/np.sqrt(len(GroundSimHist))
plt.plot(range(Nodes),AvgInfs,color='red',label='Probability Algorithm')
plt.errorbar(range(Nodes),GroundAvgInfs,yerr=GroundError,fmt='-',ecolor='green',label='Ground Truth')
plt.legend(loc='upper right')
plt.title(f'Comparison b/w Alrogithm and Ground Truth Results (N={Nodes},'
f' Mean and % Difference=({MeanInfs:.3f}, {MeanPercDif:.3f})'
f' Runtime and % Difference=({runtime:.3f}, {RunTimePercDif:.3f})')
plt.xlabel('Node ID',fontsize=15)
plt.ylabel('Infections',fontsize=15)
return AvgInfs
def KendallsTau(GroundAvgInfs,AlgoAvgInfs,PageRanks,OutDegree):
import numpy as np
from scipy.stats import stats
Ground_rank = sorted(range(len(GroundAvgInfs)), key=lambda i: GroundAvgInfs[i])[-len(GroundAvgInfs):]
Algo_rank = sorted(range(len(AlgoAvgInfs)), key=lambda i: AlgoAvgInfs[i])[-len(AlgoAvgInfs):]
Pagerank_rank = sorted(range(len(PageRanks)), key=lambda i: PageRanks[i])[-len(PageRanks):]
OutDeg_rank = sorted(range(len(OutDegree)), key=lambda i: OutDegree[i])[-len(OutDegree):]
Ground_rank.reverse()
Pagerank_rank.reverse()
Algo_rank.reverse()
OutDeg_rank.reverse()
positions = list(range(453))
dictionary = dict(zip(Ground_rank,positions))
Ground_rankings = [dictionary[i] for i in Ground_rank]
Page_rankings = [dictionary[i] for i in Pagerank_rank]
Algo_rankings = [dictionary[i] for i in Algo_rank]
OutDeg_rankings = [dictionary[i] for i in OutDeg_rank]
'''
Below is a full code of Kendall's tau just in case you want to know how it works
concord = np.zeros(453)
discord = np.zeros(453)
for ii in Page_rankings:
for jj in Page_rankings:
if jj > ii:
if Page_rankings[ii] < Page_rankings[jj]:
concord[ii] = concord[ii] + 1
elif Page_rankings[ii] > Page_rankings[jj]:
discord[ii] = discord[ii] + 1
concord_total = np.sum(concord)
discord_total = np.sum(discord)
kendall_tau = (concord_total-discord_total)/(concord_total+discord_total)
'''
AlgoTau, p_value = stats.kendalltau(Ground_rankings,Algo_rankings)
PageTau, Pp_value = stats.kendalltau(Ground_rankings,Page_rankings)
OutTau, Op_value = stats.kendalltau(Ground_rankings,OutDeg_rankings)
return AlgoTau, PageTau, OutTau