-
Notifications
You must be signed in to change notification settings - Fork 0
/
randomwalk.py
191 lines (158 loc) · 6.81 KB
/
randomwalk.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import random
import numpy
# Input parameters
sample_sizes = [100, 300, 1000, 3000, 10000]
number_of_experiments = 100
min_income = 1500
max_income = 5000
step_income = 100
#------
# a function to load the graph structure from a file
def load_graph(fname):
fr = open(fname, 'r')
G = {} # dictionary: node -> set of neighbors
for line in fr:
if not line.startswith('#'):
a,b = map(int, line.split())
if a not in G: G[a] = set()
if b not in G: G[b] = set()
G[a].add(b)
G[b].add(a)
fr.close()
return G
# a function to generate incomes randomly for a given graph
# the range is {min_income, max_income} with a step of 100
def generateIncomes(G):
Income = {}
for x in G.iterkeys():
Income[x] = random.randrange(min_income, max_income, step_income)
return Income
# a function to compute the average degree of a given graph
def calculateRealAverage(G):
total = 0.0
for x in G:
total += len(G[x])
#print total / len(G)
return total / len(G)
# a function to perform Metropolis-Hasting random walk
def MetropolisHastingsRW(G, incomes, sample_size):
# set the starting node of the random walk randomly
node = random.sample(G, 1)[0]
sampling = list()
node_degrees = list()
node_incomes = list()
# performing a random walk
for i in range(sample_size):
sampling.append(node)
node_degrees.append(len(G[node]))
node_incomes.append(incomes[node])
# select a random neighbor of node
neighbor = random.sample(G.get(node), 1)[0]
# perform Metropolis-Hastings algorithm
if (len(G[node]) > len(G[neighbor])):
node = neighbor
else:
rand = random.random()
prob = (1.0 * len(G[node])) / len(G[neighbor])
if (rand < prob):
node = neighbor
else:
node = node
#print "average with duplicates"
avg_degrees_w_duplicate = numpy.average(node_degrees)
avg_incomes_w_duplicate = numpy.average(node_incomes)
# dict() automatically remove the duplicate node id
degrees_wo_duplicate = dict(zip(sampling, node_degrees))
incomes_wo_duplicate = dict(zip(sampling, node_incomes))
#print "average without duplicate"
avg_degrees_wo_duplicate = numpy.average(degrees_wo_duplicate.values())
avg_incomes_wo_duplicate = numpy.average(incomes_wo_duplicate.values())
return [avg_degrees_w_duplicate, avg_degrees_wo_duplicate, avg_incomes_w_duplicate, avg_incomes_wo_duplicate]
def exp_mhrw(G, incomes):
real_avg_degrees = calculateRealAverage(G)
real_avg_incomes = numpy.average(incomes.values())
degrees_with_duplicate = list()
degrees_wo_duplicate = list()
incomes_with_duplicate = list()
incomes_wo_duplicate = list()
print'#sample mean_deg_wd mean_deg_wod stdev_deg_wd stdev_deg_wod real_avg_deg mean_inc_wd mean_inc_wod stdev_inc_wd stdev_inc_wod real_avg_inc'
for sample in sample_sizes:
#print "--------------------------"
#print "sample size " + repr(sample)
for i in range(number_of_experiments):
result = MetropolisHastingsRW(G, incomes, sample)
degrees_with_duplicate.append(result[0])
degrees_wo_duplicate.append(result[1])
incomes_with_duplicate.append(result[2])
incomes_wo_duplicate.append(result[3])
# mean of the estimated averages
mean_degrees_wd = numpy.average(degrees_with_duplicate)
mean_degrees_wod = numpy.average(degrees_wo_duplicate)
mean_incomes_wd = numpy.average(incomes_with_duplicate)
mean_incomes_wod = numpy.average(incomes_wo_duplicate)
#print 'mean: {0:2.7f} {1:2.7f}'.format(mean_degrees_wd, mean_degrees_wod)
# std dev of the estimated averages
stdev_degrees_wd = numpy.std(degrees_with_duplicate)
stdev_degrees_wod = numpy.std(degrees_wo_duplicate)
stdev_incomes_wd = numpy.std(incomes_with_duplicate)
stdev_incomes_wod = numpy.std(incomes_wo_duplicate)
#print 'stdev: {0:2.7f} {1:2.7f}'.format(stdev_degrees_wd, stdev_degrees_wod)
print '{0:0.0f}\t{1:2.7f}\t{2:2.7f}\t{3:2.7f}\t{4:2.7f}\t{5:2.7f}\t{6:2.7f}\t{7:2.7f}\t{8:2.7f}\t{9:2.7f}\t{10:2.7f}\t'.format(sample, mean_degrees_wd, mean_degrees_wod, stdev_degrees_wd, stdev_degrees_wod, real_avg_degrees, mean_incomes_wd, mean_incomes_wod, stdev_incomes_wd, stdev_incomes_wod, real_avg_incomes)
# a functino to perform reweigthed Random Walk
def ReWeightedRW(G, incomes, sample_size):
node = random.sample(G, 1)[0]
sampling = list()
node_degrees = list()
node_incomes = list()
for i in range(sample_size):
sampling.append(node)
node_degrees.append(len(G[node]))
node_incomes.append(incomes[node])
# select a random neighbor of node
node = random.sample(G.get(node), 1)[0]
# the normal random walk. biased, without correction.
biased_average_degrees = numpy.average(node_degrees)
biased_average_incomes = numpy.average(node_incomes)
# correcting the random walk sampling with inversed-node-degree prob
normalization_constant = 0.0
for x in node_degrees:
normalization_constant += (1.0 / x)
prob = list()
for x in node_degrees:
temp = (1.0 / x) / normalization_constant
prob.append(temp)
reweighted_average_degrees = sum(i*j for i, j in zip(prob,node_degrees))
reweighted_average_incomes = sum(i*j for i, j in zip(prob,node_incomes))
return [biased_average_degrees, reweighted_average_degrees, biased_average_incomes, reweighted_average_incomes]
def exp_rwrw(G, incomes):
real_avg_degrees = calculateRealAverage(G)
real_avg_incomes = numpy.average(incomes.values())
biased_degrees = list()
reweighted_degrees = list()
biased_incomes = list()
reweighted_incomes = list()
print '#sample mean_biased_deg mean_rw_deg std_biased_deg std_rw_deg real_avg_deg mean_biased_inc mean_rw_inc std_biased_inc stdev_rw_inc real_avg_inc'
for sample in sample_sizes:
for i in range(number_of_experiments):
result = ReWeightedRW(G, incomes, sample)
biased_degrees.append(result[0])
reweighted_degrees.append(result[1])
biased_incomes.append(result[2])
reweighted_incomes.append(result[3])
# mean of the estimated averages
mean_biased_degrees = numpy.average(biased_degrees)
mean_reweighted_degrees = numpy.average(reweighted_degrees)
mean_biased_incomes = numpy.average(biased_incomes)
mean_reweighted_incomes = numpy.average(reweighted_incomes)
# std dev of the estimated averages
stdev_biased_degrees = numpy.std(biased_degrees)
stdev_reweighted_degrees = numpy.std(reweighted_degrees)
stdev_biased_incomes = numpy.std(biased_incomes)
stdev_reweighted_incomes = numpy.std(reweighted_incomes)
print '{0:0.0f}\t{1:2.7f}\t{2:2.7f}\t{3:2.7f}\t{4:2.7f}\t{5:2.7f}\t{6:2.7f}\t{7:2.7f}\t{8:2.7f}\t{9:2.7f}\t{10:2.7f}\t'.format(sample, mean_biased_degrees, mean_reweighted_degrees, stdev_biased_degrees, stdev_reweighted_degrees, real_avg_degrees, mean_biased_incomes, mean_reweighted_incomes, stdev_biased_incomes, stdev_reweighted_incomes, real_avg_incomes)
G = load_graph("p2p-Gnutella31.txt")
incomes = generateIncomes(G)
print"# Metropolist-Hastings Random Walk"
exp_mhrw(G, incomes)
print"\n# ReWeighted Random Walk"
exp_rwrw(G, incomes)