forked from othercriteria/StochasticBlockmodel
/
test_block.py
executable file
·215 lines (186 loc) · 7.69 KB
/
test_block.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
#!/usr/bin/env python
# Test of "new style" network inference, finally with blockmodel
# Daniel Klein, 7/8/2012
import numpy as np
from Network import Network
from Models import StationaryLogistic, NonstationaryLogistic, Blockmodel
from Models import alpha_zero, alpha_norm, alpha_unif, alpha_gamma
from Experiment import RandomSubnetworks, Results, add_network_stats
from Experiment import minimum_disagreement, rel_mse
from Utility import logit
# Parameters
params = { 'N': 600,
'K': 2,
'class_conc': 10.0,
'Theta_diag': 4.0,
'Theta_mean': 0.0,
'Theta_sd': 1.0,
'B': 1,
'beta_sd': 1.0,
'alpha_unif': 0.0,
'alpha_norm_sd': 2.0,
'alpha_gamma_sd': 0.0,
'kappa_target': ('edges', 20),
'fit_nonstationary': True,
'fit_conditional': True,
'fit_K': 2,
'initialize_true_z': False,
'cycles': 20,
'sweeps': 2,
'verbose': False,
'num_reps': 5,
'sub_sizes': range(25, 155, 25),
'plot_mse': True,
'plot_network': True }
# Set random seed for reproducible output
np.random.seed(136)
# Report parameters for the run
print 'Parameters:'
for field in params:
print '%s: %s' % (field, str(params[field]))
# Initialize full network
net = Network(params['N'])
# Generate node-level propensities to extend and receive edges
if params['alpha_norm_sd'] > 0.0:
alpha_norm(net, params['alpha_norm_sd'])
elif params['alpha_unif'] > 0.0:
alpha_unif(net, params['alpha_unif'])
elif params['alpha_gamma_sd'] > 0.0:
# Choosing location somewhat arbitrarily to give unit skewness
alpha_gamma(net, 4.0, params['alpha_gamma_sd'])
else:
alpha_zero(net)
# Generate covariates and associated coefficients
data_base_model = NonstationaryLogistic()
covariates = []
for b in range(params['B']):
name = 'x_%d' % b
covariates.append(name)
data_base_model.beta[name] = np.random.normal(0, params['beta_sd'])
def f_x(i_1, i_2):
return np.random.uniform(-np.sqrt(3), np.sqrt(3))
net.new_edge_covariate(name).from_binary_function_ind(f_x)
# Initialize data (block)model from base model
class_probs = np.random.dirichlet(np.repeat(params['class_conc'], params['K']))
z = np.where(np.random.multinomial(1, class_probs, params['N']) == 1)[1]
net.new_node_covariate_int('z_true')[:] = z
data_model = Blockmodel(data_base_model, params['K'], 'z_true')
Theta = np.random.normal(params['Theta_mean'], params['Theta_sd'],
(params['K'],params['K']))
Theta += params['Theta_diag'] * np.identity(params['K'])
Theta -= np.mean(Theta)
data_model.Theta = Theta
net.generate(data_model)
if params['plot_network']:
net.show_heatmap('z_true')
# Initialize fitting model
fit_base_model = StationaryLogistic()
for c in covariates:
fit_base_model.beta[c] = None
fit_model = Blockmodel(fit_base_model, params['fit_K'])
if params['fit_nonstationary']:
n_fit_base_model = NonstationaryLogistic()
for c in covariates:
n_fit_base_model.beta[c] = None
n_fit_model = Blockmodel(n_fit_base_model, params['fit_K'])
net.new_node_covariate_int('z')
# Set up recording of results from experiment
s_results = Results(params['sub_sizes'], params['num_reps'], 'Stationary fit')
add_network_stats(s_results)
s_results.new('Subnetwork kappa', 'm', lambda d, f: d.base_model.kappa)
def f_c(c):
return ((lambda d, f: d.base_model.beta[c]),
(lambda d, f: f.base_model.beta[c]))
for c in covariates:
# Need to do this hackily to avoid for-loop/lambda-binding weirdness.
f_true, f_estimated = f_c(c)
s_results.new('True beta_{%s}' % c, 'm', f_true)
s_results.new('Estimated beta_{%s}' % c, 'm', f_estimated)
s_results.new('Class mismatch', 'n',
lambda n: minimum_disagreement(n.node_covariates['z_true'][:], \
n.node_covariates['z'][:]))
def rel_mse_p_ij(n, d, f):
P = d.edge_probabilities(n)
return rel_mse(f.edge_probabilities(n), f.baseline(n), P)
s_results.new('Rel. MSE(P)', 'nm', rel_mse_p_ij)
def rel_mse_logit_p_ij(n, d, f):
logit_P = logit(d.edge_probabilities(n))
logit_Q = f.baseline_logit(n)
return rel_mse(logit(f.edge_probabilities(n)), logit_Q, logit_P)
s_results.new('Rel. MSE(logit_P)', 'nm', rel_mse_logit_p_ij)
all_results = { 's': s_results }
if params['fit_nonstationary']:
n_results = s_results.copy()
n_results.title = 'Nonstationary fit'
all_results['n'] = n_results
if params['fit_conditional']:
c_results = s_results.copy()
c_results.title = 'Conditional fit'
all_results['c'] = c_results
def initialize(s, f):
if params['initialize_true_z']:
s.node_covariates['z'][:] = s.node_covariates['z_true'][:]
else:
s.node_covariates['z'][:] = np.random.randint(0, params['fit_K'], s.N)
for sub_size in params['sub_sizes']:
print 'subnetwork size = %d' % sub_size
gen = RandomSubnetworks(net, sub_size)
for rep in range(params['num_reps']):
subnet = gen.sample()
data_model.match_kappa(subnet, params['kappa_target'])
subnet.generate(data_model)
initialize(subnet, fit_model)
fit_model.fit(subnet, params['cycles'], params['sweeps'],
verbose = params['verbose'])
s_results.record(sub_size, rep, subnet, data_model, fit_model)
print
if params['fit_conditional']:
initialize(subnet, fit_model)
fit_base_model.fit = fit_base_model.fit_conditional
fit_model.fit(subnet, params['cycles'], params['sweeps'])
c_results.record(sub_size, rep, subnet, data_model, fit_model)
print
if params['fit_nonstationary']:
subnet.offset_extremes()
initialize(subnet, n_fit_model)
n_fit_model.fit(subnet, params['cycles'], params['sweeps'],
verbose = params['verbose'])
n_results.record(sub_size, rep, subnet, data_model, n_fit_model)
print
# Compute beta MSEs
covariate_mses = []
for c in covariates:
name = 'MSE(beta_{%s})' % c
covariate_mses.append(name)
for model in all_results:
results = all_results[model]
results.estimate_mse(name,
'True beta_{%s}' % c, 'Estimated beta_{%s}' % c)
for model in all_results:
results = all_results[model]
print results.title
results.summary()
print
# Plot inference performace, in terms of MSE(beta), MSE(P_ij), and
# inferred class disagreement; also plot kappas chosen for data models
if params['plot_mse']:
for model in all_results:
results = all_results[model]
to_plot = [(['MSE(beta_i)'] + covariate_mses,
{'ymin': 0, 'ymax': 0.5, 'plot_mean': True}),
('Rel. MSE(P)', {'ymin': 0, 'ymax': 2, 'baseline': 1}),
('Rel. MSE(logit_P)', {'ymin': 0, 'ymax': 2, 'baseline': 1}),
('Class mismatch', {'ymin': 0, 'ymax': 2})]
if model == 'n': to_plot.pop(2)
results.plot(to_plot)
# Plot network statistics as well as sparsity parameter
if params['plot_network']:
s_results.title = None
s_results.plot([('Average out-degree', {'ymin': 0, 'plot_mean': True}),
('Average in-degree', {'ymin': 0, 'plot_mean': True}),
(['Out-degree', 'Max out-degree', 'Min out-degree'],
{'ymin': 0, 'plot_mean': True}),
(['In-degree', 'Max out-degree', 'Min in-degree'],
{'ymin': 0, 'plot_mean': True}),
('Self-loop density', {'ymin': 0, 'plot_mean': True}),
'Subnetwork kappa'])